-1.0 [IN PROGRESS]
- - Documentation updates
- - Clean up unused files
+0.10 [IN PROGRESS]
+ - Documentation updates
+ - Clean up unused files
+ - schema/solrconfig updates for Solr 5
+ - Includes Solr 5.5.1
+ - Remove RHEL packaging
+ - Rebuild Debian packaging
0.9 Thu Oct 22 12:35:00 UTC 2015
- Includes SOLR 4.9.1
NAME=masterkey-lui-solr
-VERSION=0.9
-DEBIAN_DIST="jessie squeeze wheezy"
+VERSION=0.10
+DEBIAN_DIST="jessie wheezy"
UBUNTU_DIST=""
--- /dev/null
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [[ $EUID -ne 0 ]]; then
+ echo -e "\nERROR: This script must be run as root\n" 1>&2
+ exit 1
+fi
+
+print_usage() {
+ ERROR_MSG="$1"
+
+ if [ "$ERROR_MSG" != "" ]; then
+ echo -e "\nERROR: $ERROR_MSG\n" 1>&2
+ fi
+
+ echo ""
+ echo "Usage: install_solr_service.sh path_to_solr_distribution_archive OPTIONS"
+ echo ""
+ echo " The first argument to the script must be a path to a Solr distribution archive, such as solr-5.0.0.tgz"
+ echo " (only .tgz or .zip are supported formats for the archive)"
+ echo ""
+ echo " Supported OPTIONS include:"
+ echo ""
+ echo " -d Directory for live / writable Solr files, such as logs, pid files, and index data; defaults to /var/solr"
+ echo ""
+ echo " -i Directory to extract the Solr installation archive; defaults to /opt/"
+ echo " The specified path must exist prior to using this script."
+ echo ""
+ echo " -p Port Solr should bind to; default is 8983"
+ echo ""
+ echo " -s Service name; defaults to solr"
+ echo ""
+ echo " -u User to own the Solr files and run the Solr process as; defaults to solr"
+ echo " This script will create the specified user account if it does not exist."
+ echo ""
+ echo " -f Upgrade Solr. Overwrite symlink and init script of previous installation."
+ echo ""
+ echo " NOTE: Must be run as the root user"
+ echo ""
+} # end print_usage
+
+if [ -f "/proc/version" ]; then
+ proc_version=`cat /proc/version`
+else
+ proc_version=`uname -a`
+fi
+
+if [[ $proc_version == *"Debian"* ]]; then
+ distro=Debian
+elif [[ $proc_version == *"Red Hat"* ]]; then
+ distro=RedHat
+elif [[ $proc_version == *"Ubuntu"* ]]; then
+ distro=Ubuntu
+elif [[ $proc_version == *"SUSE"* ]]; then
+ distro=SUSE
+else
+ echo -e "\nERROR: Your Linux distribution ($proc_version) not supported by this script!\nYou'll need to setup Solr as a service manually using the documentation provided in the Solr Reference Guide.\n" 1>&2
+ exit 1
+fi
+
+if [ -z "$1" ]; then
+ print_usage "Must specify the path to the Solr installation archive, such as solr-5.0.0.tgz"
+ exit 1
+fi
+
+SOLR_ARCHIVE=$1
+if [ ! -f "$SOLR_ARCHIVE" ]; then
+ print_usage "Specified Solr installation archive $SOLR_ARCHIVE not found!"
+ exit 1
+fi
+
+# strip off path info
+SOLR_INSTALL_FILE=${SOLR_ARCHIVE##*/}
+is_tar=true
+if [ ${SOLR_INSTALL_FILE: -4} == ".tgz" ]; then
+ SOLR_DIR=${SOLR_INSTALL_FILE%.tgz}
+elif [ ${SOLR_INSTALL_FILE: -4} == ".zip" ]; then
+ SOLR_DIR=${SOLR_INSTALL_FILE%.zip}
+ is_tar=false
+else
+ print_usage "Solr installation archive $SOLR_ARCHIVE is invalid, expected a .tgz or .zip file!"
+ exit 1
+fi
+
+if [ $# -gt 1 ]; then
+ shift
+ while true; do
+ case $1 in
+ -i)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Directory path is required when using the $1 option!"
+ exit 1
+ fi
+ SOLR_EXTRACT_DIR=$2
+ shift 2
+ ;;
+ -d)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Directory path is required when using the $1 option!"
+ exit 1
+ fi
+ SOLR_VAR_DIR="$2"
+ shift 2
+ ;;
+ -u)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Username is required when using the $1 option!"
+ exit 1
+ fi
+ SOLR_USER="$2"
+ shift 2
+ ;;
+ -s)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Service name is required when using the $1 option!"
+ exit 1
+ fi
+ SOLR_SERVICE="$2"
+ shift 2
+ ;;
+ -p)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Port is required when using the $1 option!"
+ exit 1
+ fi
+ SOLR_PORT="$2"
+ shift 2
+ ;;
+ -f)
+ SOLR_UPGRADE="YES"
+ shift 1
+ ;;
+ -help|-usage)
+ print_usage ""
+ exit 0
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ if [ "$1" != "" ]; then
+ print_usage "Unrecognized or misplaced argument: $1!"
+ exit 1
+ else
+ break # out-of-args, stop looping
+ fi
+ ;;
+ esac
+ done
+fi
+
+if [ -z "$SOLR_EXTRACT_DIR" ]; then
+ SOLR_EXTRACT_DIR=/opt
+fi
+
+if [ ! -d "$SOLR_EXTRACT_DIR" ]; then
+ print_usage "Installation directory $SOLR_EXTRACT_DIR not found! Please create it before running this script."
+ exit 1
+fi
+
+if [ -z "$SOLR_SERVICE" ]; then
+ SOLR_SERVICE=solr
+fi
+
+if [ -z "$SOLR_VAR_DIR" ]; then
+ SOLR_VAR_DIR="/var/$SOLR_SERVICE"
+fi
+
+if [ -z "$SOLR_USER" ]; then
+ SOLR_USER=solr
+fi
+
+if [ -z "$SOLR_PORT" ]; then
+ SOLR_PORT=8983
+fi
+
+if [ -z "$SOLR_UPGRADE" ]; then
+ SOLR_UPGRADE=NO
+fi
+
+if [ ! "$SOLR_UPGRADE" = "YES" ]; then
+ if [ -f "/etc/init.d/$SOLR_SERVICE" ]; then
+ print_usage "/etc/init.d/$SOLR_SERVICE already exists! Perhaps Solr is already setup as a service on this host? To upgrade Solr use the -f option."
+ exit 1
+ fi
+
+ if [ -e "$SOLR_EXTRACT_DIR/$SOLR_SERVICE" ]; then
+ print_usage "$SOLR_EXTRACT_DIR/$SOLR_SERVICE already exists! Please move this directory / link or choose a different service name using the -s option."
+ exit 1
+ fi
+fi
+
+# stop running instance
+if [ -f "/etc/init.d/$SOLR_SERVICE" ]; then
+ echo -e "\nStopping Solr instance if exists ...\n"
+ service "$SOLR_SERVICE" stop
+fi
+
+# create user if not exists
+solr_uid="`id -u "$SOLR_USER"`"
+if [ $? -ne 0 ]; then
+ echo "Creating new user: $SOLR_USER"
+ if [ "$distro" == "RedHat" ]; then
+ adduser "$SOLR_USER"
+ elif [ "$distro" == "SUSE" ]; then
+ useradd -m "$SOLR_USER"
+ else
+ adduser --system --shell /bin/bash --group --disabled-password --home "$SOLR_VAR_DIR" "$SOLR_USER"
+ fi
+fi
+
+# extract
+SOLR_INSTALL_DIR="$SOLR_EXTRACT_DIR/$SOLR_DIR"
+if [ ! -d "$SOLR_INSTALL_DIR" ]; then
+
+ echo -e "\nExtracting $SOLR_ARCHIVE to $SOLR_EXTRACT_DIR\n"
+
+ if $is_tar ; then
+ tar zxf "$SOLR_ARCHIVE" -C "$SOLR_EXTRACT_DIR"
+ else
+ unzip -q "$SOLR_ARCHIVE" -d "$SOLR_EXTRACT_DIR"
+ fi
+
+ if [ ! -d "$SOLR_INSTALL_DIR" ]; then
+ echo -e "\nERROR: Expected directory $SOLR_INSTALL_DIR not found after extracting $SOLR_ARCHIVE ... script fails.\n" 1>&2
+ exit 1
+ fi
+
+ chown -R root: "$SOLR_INSTALL_DIR"
+ find "$SOLR_INSTALL_DIR" -type d -print0 | xargs -0 chmod 0755
+ find "$SOLR_INSTALL_DIR" -type f -print0 | xargs -0 chmod 0644
+ chmod -R 0755 "$SOLR_INSTALL_DIR/bin"
+else
+ echo -e "\nWARNING: $SOLR_INSTALL_DIR already exists! Skipping extract ...\n"
+fi
+
+# create a symlink for easier scripting
+if [ -h "$SOLR_EXTRACT_DIR/$SOLR_SERVICE" ]; then
+ echo -e "\nRemoving old symlink $SOLR_EXTRACT_DIR/$SOLR_SERVICE ...\n"
+ rm "$SOLR_EXTRACT_DIR/$SOLR_SERVICE"
+fi
+if [ -e "$SOLR_EXTRACT_DIR/$SOLR_SERVICE" ]; then
+ echo -e "\nWARNING: $SOLR_EXTRACT_DIR/$SOLR_SERVICE is not symlink! Skipping symlink update ...\n"
+else
+ echo -e "\nInstalling symlink $SOLR_EXTRACT_DIR/$SOLR_SERVICE -> $SOLR_INSTALL_DIR ...\n"
+ ln -s "$SOLR_INSTALL_DIR" "$SOLR_EXTRACT_DIR/$SOLR_SERVICE"
+fi
+
+# install init.d script
+echo -e "\nInstalling /etc/init.d/$SOLR_SERVICE script ...\n"
+cp "$SOLR_INSTALL_DIR/bin/init.d/solr" "/etc/init.d/$SOLR_SERVICE"
+chmod 0744 "/etc/init.d/$SOLR_SERVICE"
+chown root: "/etc/init.d/$SOLR_SERVICE"
+# do some basic variable substitution on the init.d script
+sed_expr1="s#SOLR_INSTALL_DIR=.*#SOLR_INSTALL_DIR=\"$SOLR_EXTRACT_DIR/$SOLR_SERVICE\"#"
+sed_expr2="s#SOLR_ENV=.*#SOLR_ENV=\"/etc/default/$SOLR_SERVICE.in.sh\"#"
+sed_expr3="s#RUNAS=.*#RUNAS=\"$SOLR_USER\"#"
+sed_expr4="s#Provides:.*#Provides: $SOLR_SERVICE#"
+sed -i -e "$sed_expr1" -e "$sed_expr2" -e "$sed_expr3" -e "$sed_expr4" "/etc/init.d/$SOLR_SERVICE"
+
+# install/move configuration
+if [ ! -d /etc/default ]; then
+ mkdir /etc/default
+ chown root: /etc/default
+ chmod 0755 /etc/default
+fi
+if [ -f "$SOLR_VAR_DIR/solr.in.sh" ]; then
+ echo -e "\nMoving existing $SOLR_VAR_DIR/solr.in.sh to /etc/default/$SOLR_SERVICE.in.sh ...\n"
+ mv "$SOLR_VAR_DIR/solr.in.sh" "/etc/default/$SOLR_SERVICE.in.sh"
+elif [ -f "/etc/default/$SOLR_SERVICE.in.sh" ]; then
+ echo -e "\n/etc/default/$SOLR_SERVICE.in.sh already exist. Skipping install ...\n"
+else
+ echo -e "\nInstalling /etc/default/$SOLR_SERVICE.in.sh ...\n"
+ cp "$SOLR_INSTALL_DIR/bin/solr.in.sh" "/etc/default/$SOLR_SERVICE.in.sh"
+ echo "SOLR_PID_DIR=\"$SOLR_VAR_DIR\"
+SOLR_HOME=\"$SOLR_VAR_DIR/data\"
+LOG4J_PROPS=\"$SOLR_VAR_DIR/log4j.properties\"
+SOLR_LOGS_DIR=\"$SOLR_VAR_DIR/logs\"
+SOLR_PORT=\"$SOLR_PORT\"
+" >> "/etc/default/$SOLR_SERVICE.in.sh"
+fi
+chown root: "/etc/default/$SOLR_SERVICE.in.sh"
+chmod 0644 "/etc/default/$SOLR_SERVICE.in.sh"
+
+# install data directories and files
+mkdir -p "$SOLR_VAR_DIR/data"
+mkdir -p "$SOLR_VAR_DIR/logs"
+if [ -f "$SOLR_VAR_DIR/data/solr.xml" ]; then
+ echo -e "\n$SOLR_VAR_DIR/data/solr.xml already exists. Skipping install ...\n"
+else
+ cp "$SOLR_INSTALL_DIR/server/solr/solr.xml" "$SOLR_VAR_DIR/data/solr.xml"
+fi
+if [ -f "$SOLR_VAR_DIR/log4j.properties" ]; then
+ echo -e "\n$SOLR_VAR_DIR/log4j.properties already exists. Skipping install ...\n"
+else
+ cp "$SOLR_INSTALL_DIR/server/resources/log4j.properties" "$SOLR_VAR_DIR/log4j.properties"
+ sed_expr="s#solr.log=.*#solr.log=\${solr.solr.home}/../logs#"
+ sed -i -e "$sed_expr" "$SOLR_VAR_DIR/log4j.properties"
+fi
+chown -R "$SOLR_USER:" "$SOLR_VAR_DIR"
+find "$SOLR_VAR_DIR" -type d -print0 | xargs -0 chmod 0750
+find "$SOLR_VAR_DIR" -type f -print0 | xargs -0 chmod 0640
+
+# configure autostart of service
+if [[ "$distro" == "RedHat" || "$distro" == "SUSE" ]]; then
+ chkconfig "$SOLR_SERVICE" on
+else
+ update-rc.d "$SOLR_SERVICE" defaults
+fi
+
+# start service
+service "$SOLR_SERVICE" start
+sleep 5
+service "$SOLR_SERVICE" status
+
+echo "Service $SOLR_SERVICE installed."
+++ /dev/null
-#! /bin/sh
-#
-# skeleton example file to build /etc/init.d/ scripts.
-# This file should be used to construct scripts for /etc/init.d.
-#
-# Written by Miquel van Smoorenburg <miquels@cistron.nl>.
-# Modified for Debian GNU/Linux
-# by Ian Murdock <imurdock@gnu.ai.mit.edu>.
-#
-# Version: @(#)skeleton 1.8 03-Mar-1998 miquels@cistron.nl
-
-### BEGIN INIT INFO
-# Provides: indexdata-solr
-# Required-Start: $network
-# Required-Stop: $network
-# Should-Start:
-# Default-Start: 2 3 4 5
-# Default-Stop: 0 1 6
-# Short-Description: Indexdata Cloud Solr (Solr with Zookeeper)
-# Description: startes the services on port xxxx
-### END INIT INFO
-#
-
-PATH=/usr/bin
-DAEMON=/usr/share/masterkey/lui/solr4/zookeeper/zookeeper.sh
-SERVICES=""
-DESC="Index Data Services (SOLR Zookeeper)"
-NAME=indexdata-solr-zookeeper
-DEFAULT=/etc/default/indexdata-solr-zookeeper
-if [ -f "$DEFAULT" ] ; then
- . $DEFAULT
-fi
-
-# test -d $SERVICES || exit 0
-
-set -e
-
-case "$1" in
- start)
- for SERVICE in $SERVICES ; do
- if [ -d "$SERVICE" ] ; then
- echo "starting $SERVICE"
- cd $SERVICE
- $DAEMON $1
- else
- echo "No directory at $SERVICE"
- fi
- done
- ;;
- stop)
- for SERVICE in $SERVICES ; do
- if [ -d "$SERVICE" ] ; then
- echo "Stopping $SERVICE"
- cd $SERVICE
- $DAEMON stop
- else
- echo "No directory at $SERVICE"
- fi
- done
- ;;
- status)
- for SERVICE in $SERVICES ; do
- if [ -d "$SERVICE" ] ; then
- echo "checking $SERVICE"
- cd $SERVICE
- $DAEMON status
- else
- echo "No directory at $SERVICE"
- fi
- done
- ;;
- #reload)
- #
- # If the daemon can reload its config files on the fly
- # for example by sending it SIGHUP, do it here.
- #
- # If the daemon responds to changes in its config file
- # directly anyway, make this a do-nothing entry.
- #
- # echo "Reloading $DESC configuration files."
- # start-stop-daemon --stop --signal 1 --quiet --pidfile \
- # /var/run/$NAME.pid --exec $DAEMON
- #;;
- restart|force-reload)
- #
- # If the "reload" option is implemented, move the "force-reload"
- # option to the "reload" entry above. If not, "force-reload" is
- # just the same as "restart".
- #
- echo "Restarting $DESC: "
- for srv in $SERVICES ; do
- $srv restart
- done
- ;;
- *)
- N=/etc/init.d/$NAME
- # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
- echo "Usage: $N {start|stop|restart|force-reload}" >&2
- exit 1
- ;;
-esac
-
-exit 0
+++ /dev/null
-<?xml version="1.0"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:pz="http://www.indexdata.com/pazpar2/1.0" >
- <xsl:template match="/">
- <add>
- <xsl:apply-templates></xsl:apply-templates>
- </add>
- </xsl:template>
-
- <xsl:template match="pz:record">
- <doc>
- <xsl:apply-templates></xsl:apply-templates>
- </doc>
- </xsl:template>
- <xsl:template match="pz:metadata">
- <xsl:if test="@type">
- <field>
- <xsl:attribute name="name">
- <xsl:value-of select="@type"/>
- </xsl:attribute>
- <xsl:value-of select="."/>
- </field>
- </xsl:if>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>
-<Context
- docBase="../solr-4.6.0.war"
->
- <Environment name="solr/home" type="java.lang.String" value="lui-solr4" override="true"/>
-</Context>
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Version 0.6.2. Adding comment to force config update -->
-<Context
- docBase="/usr/share/masterkey/lui/solr4/war/solr.war"
- path="/solr4"
->
- <Environment name="solr/home" type="java.lang.String" value="/usr/share/masterkey/lui/solr4/master" override="true"/>
-</Context>
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Version 0.6.2. Adding comment to force check -->
-<Context
- docBase="/usr/share/masterkey/lui/solr4/war/solr.war"
- path="/solr4-slave"
->
- <Environment name="solr/home" type="java.lang.String" value="/usr/share/masterkey/lui/solr4/slave" override="true"/>
-</Context>
+++ /dev/null
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if [[ $EUID -ne 0 ]]; then
- echo -e "\nERROR: This script must be run as root\n" 1>&2
- exit 1
-fi
-
-print_usage() {
- ERROR_MSG="$1"
-
- if [ "$ERROR_MSG" != "" ]; then
- echo -e "\nERROR: $ERROR_MSG\n" 1>&2
- fi
-
- echo ""
- echo "Usage: install_solr_service.sh path_to_solr_distribution_archive OPTIONS"
- echo ""
- echo " The first argument to the script must be a path to a Solr distribution archive, such as solr-5.0.0.tgz"
- echo " (only .tgz or .zip are supported formats for the archive)"
- echo ""
- echo " Supported OPTIONS include:"
- echo ""
- echo " -d Directory for live / writable Solr files, such as logs, pid files, and index data; defaults to /var/solr"
- echo ""
- echo " -i Directory to extract the Solr installation archive; defaults to /opt/"
- echo " The specified path must exist prior to using this script."
- echo ""
- echo " -p Port Solr should bind to; default is 8983"
- echo ""
- echo " -s Service name; defaults to solr"
- echo ""
- echo " -u User to own the Solr files and run the Solr process as; defaults to solr"
- echo " This script will create the specified user account if it does not exist."
- echo ""
- echo " -f Upgrade Solr. Overwrite symlink and init script of previous installation."
- echo ""
- echo " NOTE: Must be run as the root user"
- echo ""
-} # end print_usage
-
-if [ -f "/proc/version" ]; then
- proc_version=`cat /proc/version`
-else
- proc_version=`uname -a`
-fi
-
-if [[ $proc_version == *"Debian"* ]]; then
- distro=Debian
-elif [[ $proc_version == *"Red Hat"* ]]; then
- distro=RedHat
-elif [[ $proc_version == *"Ubuntu"* ]]; then
- distro=Ubuntu
-elif [[ $proc_version == *"SUSE"* ]]; then
- distro=SUSE
-else
- echo -e "\nERROR: Your Linux distribution ($proc_version) not supported by this script!\nYou'll need to setup Solr as a service manually using the documentation provided in the Solr Reference Guide.\n" 1>&2
- exit 1
-fi
-
-if [ -z "$1" ]; then
- print_usage "Must specify the path to the Solr installation archive, such as solr-5.0.0.tgz"
- exit 1
-fi
-
-SOLR_ARCHIVE=$1
-if [ ! -f "$SOLR_ARCHIVE" ]; then
- print_usage "Specified Solr installation archive $SOLR_ARCHIVE not found!"
- exit 1
-fi
-
-# strip off path info
-SOLR_INSTALL_FILE=${SOLR_ARCHIVE##*/}
-is_tar=true
-if [ ${SOLR_INSTALL_FILE: -4} == ".tgz" ]; then
- SOLR_DIR=${SOLR_INSTALL_FILE%.tgz}
-elif [ ${SOLR_INSTALL_FILE: -4} == ".zip" ]; then
- SOLR_DIR=${SOLR_INSTALL_FILE%.zip}
- is_tar=false
-else
- print_usage "Solr installation archive $SOLR_ARCHIVE is invalid, expected a .tgz or .zip file!"
- exit 1
-fi
-
-if [ $# -gt 1 ]; then
- shift
- while true; do
- case $1 in
- -i)
- if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
- print_usage "Directory path is required when using the $1 option!"
- exit 1
- fi
- SOLR_EXTRACT_DIR=$2
- shift 2
- ;;
- -d)
- if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
- print_usage "Directory path is required when using the $1 option!"
- exit 1
- fi
- SOLR_VAR_DIR="$2"
- shift 2
- ;;
- -u)
- if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
- print_usage "Username is required when using the $1 option!"
- exit 1
- fi
- SOLR_USER="$2"
- shift 2
- ;;
- -s)
- if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
- print_usage "Service name is required when using the $1 option!"
- exit 1
- fi
- SOLR_SERVICE="$2"
- shift 2
- ;;
- -p)
- if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
- print_usage "Port is required when using the $1 option!"
- exit 1
- fi
- SOLR_PORT="$2"
- shift 2
- ;;
- -f)
- SOLR_UPGRADE="YES"
- shift 1
- ;;
- -help|-usage)
- print_usage ""
- exit 0
- ;;
- --)
- shift
- break
- ;;
- *)
- if [ "$1" != "" ]; then
- print_usage "Unrecognized or misplaced argument: $1!"
- exit 1
- else
- break # out-of-args, stop looping
- fi
- ;;
- esac
- done
-fi
-
-if [ -z "$SOLR_EXTRACT_DIR" ]; then
- SOLR_EXTRACT_DIR=/opt
-fi
-
-if [ ! -d "$SOLR_EXTRACT_DIR" ]; then
- print_usage "Installation directory $SOLR_EXTRACT_DIR not found! Please create it before running this script."
- exit 1
-fi
-
-if [ -z "$SOLR_SERVICE" ]; then
- SOLR_SERVICE=solr
-fi
-
-if [ -z "$SOLR_VAR_DIR" ]; then
- SOLR_VAR_DIR="/var/$SOLR_SERVICE"
-fi
-
-if [ -z "$SOLR_USER" ]; then
- SOLR_USER=solr
-fi
-
-if [ -z "$SOLR_PORT" ]; then
- SOLR_PORT=8983
-fi
-
-if [ -z "$SOLR_UPGRADE" ]; then
- SOLR_UPGRADE=NO
-fi
-
-if [ ! "$SOLR_UPGRADE" = "YES" ]; then
- if [ -f "/etc/init.d/$SOLR_SERVICE" ]; then
- print_usage "/etc/init.d/$SOLR_SERVICE already exists! Perhaps Solr is already setup as a service on this host? To upgrade Solr use the -f option."
- exit 1
- fi
-
- if [ -e "$SOLR_EXTRACT_DIR/$SOLR_SERVICE" ]; then
- print_usage "$SOLR_EXTRACT_DIR/$SOLR_SERVICE already exists! Please move this directory / link or choose a different service name using the -s option."
- exit 1
- fi
-fi
-
-# stop running instance
-if [ -f "/etc/init.d/$SOLR_SERVICE" ]; then
- echo -e "\nStopping Solr instance if exists ...\n"
- service "$SOLR_SERVICE" stop
-fi
-
-# create user if not exists
-solr_uid="`id -u "$SOLR_USER"`"
-if [ $? -ne 0 ]; then
- echo "Creating new user: $SOLR_USER"
- if [ "$distro" == "RedHat" ]; then
- adduser "$SOLR_USER"
- elif [ "$distro" == "SUSE" ]; then
- useradd -m "$SOLR_USER"
- else
- adduser --system --shell /bin/bash --group --disabled-password --home "$SOLR_VAR_DIR" "$SOLR_USER"
- fi
-fi
-
-# extract
-SOLR_INSTALL_DIR="$SOLR_EXTRACT_DIR/$SOLR_DIR"
-if [ ! -d "$SOLR_INSTALL_DIR" ]; then
-
- echo -e "\nExtracting $SOLR_ARCHIVE to $SOLR_EXTRACT_DIR\n"
-
- if $is_tar ; then
- tar zxf "$SOLR_ARCHIVE" -C "$SOLR_EXTRACT_DIR"
- else
- unzip -q "$SOLR_ARCHIVE" -d "$SOLR_EXTRACT_DIR"
- fi
-
- if [ ! -d "$SOLR_INSTALL_DIR" ]; then
- echo -e "\nERROR: Expected directory $SOLR_INSTALL_DIR not found after extracting $SOLR_ARCHIVE ... script fails.\n" 1>&2
- exit 1
- fi
-
- chown -R root: "$SOLR_INSTALL_DIR"
- find "$SOLR_INSTALL_DIR" -type d -print0 | xargs -0 chmod 0755
- find "$SOLR_INSTALL_DIR" -type f -print0 | xargs -0 chmod 0644
- chmod -R 0755 "$SOLR_INSTALL_DIR/bin"
-else
- echo -e "\nWARNING: $SOLR_INSTALL_DIR already exists! Skipping extract ...\n"
-fi
-
-# create a symlink for easier scripting
-if [ -h "$SOLR_EXTRACT_DIR/$SOLR_SERVICE" ]; then
- echo -e "\nRemoving old symlink $SOLR_EXTRACT_DIR/$SOLR_SERVICE ...\n"
- rm "$SOLR_EXTRACT_DIR/$SOLR_SERVICE"
-fi
-if [ -e "$SOLR_EXTRACT_DIR/$SOLR_SERVICE" ]; then
- echo -e "\nWARNING: $SOLR_EXTRACT_DIR/$SOLR_SERVICE is not symlink! Skipping symlink update ...\n"
-else
- echo -e "\nInstalling symlink $SOLR_EXTRACT_DIR/$SOLR_SERVICE -> $SOLR_INSTALL_DIR ...\n"
- ln -s "$SOLR_INSTALL_DIR" "$SOLR_EXTRACT_DIR/$SOLR_SERVICE"
-fi
-
-# install init.d script
-echo -e "\nInstalling /etc/init.d/$SOLR_SERVICE script ...\n"
-cp "$SOLR_INSTALL_DIR/bin/init.d/solr" "/etc/init.d/$SOLR_SERVICE"
-chmod 0744 "/etc/init.d/$SOLR_SERVICE"
-chown root: "/etc/init.d/$SOLR_SERVICE"
-# do some basic variable substitution on the init.d script
-sed_expr1="s#SOLR_INSTALL_DIR=.*#SOLR_INSTALL_DIR=\"$SOLR_EXTRACT_DIR/$SOLR_SERVICE\"#"
-sed_expr2="s#SOLR_ENV=.*#SOLR_ENV=\"/etc/default/$SOLR_SERVICE.in.sh\"#"
-sed_expr3="s#RUNAS=.*#RUNAS=\"$SOLR_USER\"#"
-sed_expr4="s#Provides:.*#Provides: $SOLR_SERVICE#"
-sed -i -e "$sed_expr1" -e "$sed_expr2" -e "$sed_expr3" -e "$sed_expr4" "/etc/init.d/$SOLR_SERVICE"
-
-# install/move configuration
-if [ ! -d /etc/default ]; then
- mkdir /etc/default
- chown root: /etc/default
- chmod 0755 /etc/default
-fi
-if [ -f "$SOLR_VAR_DIR/solr.in.sh" ]; then
- echo -e "\nMoving existing $SOLR_VAR_DIR/solr.in.sh to /etc/default/$SOLR_SERVICE.in.sh ...\n"
- mv "$SOLR_VAR_DIR/solr.in.sh" "/etc/default/$SOLR_SERVICE.in.sh"
-elif [ -f "/etc/default/$SOLR_SERVICE.in.sh" ]; then
- echo -e "\n/etc/default/$SOLR_SERVICE.in.sh already exist. Skipping install ...\n"
-else
- echo -e "\nInstalling /etc/default/$SOLR_SERVICE.in.sh ...\n"
- cp "$SOLR_INSTALL_DIR/bin/solr.in.sh" "/etc/default/$SOLR_SERVICE.in.sh"
- echo "SOLR_PID_DIR=\"$SOLR_VAR_DIR\"
-SOLR_HOME=\"$SOLR_VAR_DIR/data\"
-LOG4J_PROPS=\"$SOLR_VAR_DIR/log4j.properties\"
-SOLR_LOGS_DIR=\"$SOLR_VAR_DIR/logs\"
-SOLR_PORT=\"$SOLR_PORT\"
-" >> "/etc/default/$SOLR_SERVICE.in.sh"
-fi
-chown root: "/etc/default/$SOLR_SERVICE.in.sh"
-chmod 0644 "/etc/default/$SOLR_SERVICE.in.sh"
-
-# install data directories and files
-mkdir -p "$SOLR_VAR_DIR/data"
-mkdir -p "$SOLR_VAR_DIR/logs"
-if [ -f "$SOLR_VAR_DIR/data/solr.xml" ]; then
- echo -e "\n$SOLR_VAR_DIR/data/solr.xml already exists. Skipping install ...\n"
-else
- cp "$SOLR_INSTALL_DIR/server/solr/solr.xml" "$SOLR_VAR_DIR/data/solr.xml"
-fi
-if [ -f "$SOLR_VAR_DIR/log4j.properties" ]; then
- echo -e "\n$SOLR_VAR_DIR/log4j.properties already exists. Skipping install ...\n"
-else
- cp "$SOLR_INSTALL_DIR/server/resources/log4j.properties" "$SOLR_VAR_DIR/log4j.properties"
- sed_expr="s#solr.log=.*#solr.log=\${solr.solr.home}/../logs#"
- sed -i -e "$sed_expr" "$SOLR_VAR_DIR/log4j.properties"
-fi
-chown -R "$SOLR_USER:" "$SOLR_VAR_DIR"
-find "$SOLR_VAR_DIR" -type d -print0 | xargs -0 chmod 0750
-find "$SOLR_VAR_DIR" -type f -print0 | xargs -0 chmod 0640
-
-# configure autostart of service
-if [[ "$distro" == "RedHat" || "$distro" == "SUSE" ]]; then
- chkconfig "$SOLR_SERVICE" on
-else
- update-rc.d "$SOLR_SERVICE" defaults
-fi
-
-# start service
-service "$SOLR_SERVICE" start
-sleep 5
-service "$SOLR_SERVICE" status
-
-echo "Service $SOLR_SERVICE installed."
+++ /dev/null
-# Logging level
-log4j.rootLogger=INFO, file, CONSOLE
-
-log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
-
-log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
-log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x \u2013 %m%n
-
-#- size rotation with log cleanup.
-log4j.appender.file=org.apache.log4j.RollingFileAppender
-log4j.appender.file.MaxFileSize=4MB
-log4j.appender.file.MaxBackupIndex=9
-
-#- File to log to and log format
-log4j.appender.file.File=logs/solr.log
-log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n
-
-log4j.logger.org.apache.zookeeper=WARN
\ No newline at end of file
+++ /dev/null
-Summary: Local Unified Index (Solr)
-Name: masterkey-lui-solr
-Version: 0.3.0
-Release: 1.indexdata
-License: IndexData
-Group: Applications/Internet
-Vendor: Index Data ApS <info@indexdata.dk>
-Source: masterkey-lui-solr-%{version}.tar.gz
-BuildArch: noarch
-BuildRoot: %{_tmppath}/%{name}-%{version}-root
-Packager: Dennis Schafroth <dennis@indexdata.com>
-URL: http://www.indexdata.dk/masterkey
-
-%description
-The Local Unified Index is a index that contains multiple logical indexes. The underlying index is a Solr/lucene index configured to use a specific LUI schema.
-
-%package -n masterkey-lui-solr-tomcat
-Summary: Local Unified Index Solr Tomcat
-Requires: masterkey-lui-solr
-Group: Applications/Internet
-
-%package -n masterkey-lui-solr-tomcat6
-Summary: Local Unified Index Solr Tomcat 6 integration
-Requires: tomcat6 masterkey-lui-solr-tomcat
-Group: Applications/Internet
-
-#%description -n masterkey-lui-solr-initd
-#The LUI is the stand-alone Solr/Lucene Index with the LUI schema
-
-%description -n masterkey-lui-solr-tomcat
-The LUI Tomcat is part of the MasterKey suite. This package provides the LUI Tomcat context
-
-%description -n masterkey-lui-solr-tomcat6
-The LUI is part of the MasterKey suite. This package provides Tomcat 6 integration.
-
-%prep
-%setup
-%build
-
-%install
-mkdir -p ${RPM_BUILD_ROOT}/%{_datadir}/masterkey/lui/solr/war
-cp -a dist/* ${RPM_BUILD_ROOT}/%{_datadir}/masterkey/lui/solr/war
-mkdir -p ${RPM_BUILD_ROOT}/%{_datadir}/masterkey/lui/solr/conf
-cp -a solr-conf/* ${RPM_BUILD_ROOT}/%{_datadir}/masterkey/lui/solr/conf
-
-mkdir -p ${RPM_BUILD_ROOT}/%{_sysconfdir}/masterkey/lui/solr
-# cp -a etc/harvester.properties ${RPM_BUILD_ROOT}/%{_sysconfdir}/masterkey/lui/solr
-
-#mkdir -p ${RPM_BUILD_ROOT}/%{_localstatedir}/log/masterkey/lui/solr
-mkdir -p ${RPM_BUILD_ROOT}/%{_localstatedir}/lib/masterkey/lui/solr/data
-
-#tomcat context
-sed -e 's@docBase=".*"@docBase="%{_datadir}\/masterkey\/lui\/solr\/war\/apache-solr-3.3.0.war"@g' etc/solr-tomcat-context.xml > ${RPM_BUILD_ROOT}/%{_sysconfdir}/masterkey/lui/solr/lui-solr-context.xml
-
-# Solr config
-sed -e 's@${solr.data.dir:.*}@${solr.data.dir:%{_localstatedir}\/lib\/masterkey\/lui\/solr\/data}@g' solr-conf/solrconfig.xml > ${RPM_BUILD_ROOT}/%{_datadir}/masterkey/lui/solr/conf/solrconfig.xml
-
-%clean
-rm -fr ${RPM_BUILD_ROOT}
-
-%post -n masterkey-lui-solr-tomcat6
-ln -sf %{_sysconfdir}/masterkey/lui/solr/lui-solr-context.xml %{_sysconfdir}/tomcat6/Catalina/localhost/lui-solr.xml
-
-%postun -n masterkey-lui-solr-tomcat6
-if [ $1 = 0 ]; then
- rm -rf %{_sysconfdir}/tomcat6/Catalina/localhost/lui-solr.xml
-fi
-
-%files
-%defattr(755,tomcat,tomcat)
-%{_datadir}/masterkey/lui/solr
-%{_localstatedir}/lib/masterkey/lui/solr
-%{_localstatedir}/lib/masterkey/lui/solr/data
-%attr(755,tomcat,tomcat) %dir %{_localstatedir}/lib/masterkey/lui/solr
-
-%files -n masterkey-lui-solr-tomcat
-%defattr(-,root,root)
-%config %{_sysconfdir}/masterkey/lui/solr/lui-solr-context.xml
-
-%files -n masterkey-lui-solr-tomcat6
+++ /dev/null
-#!/bin/bash
-HOST_PORT=localhost:8080
-
-if [ "$2" != "" ] ; then
- HOST_PORT="$2"
-fi
-
-SOLR_PATH=solr4
-if [ "$2" != "" ] ; then
- SOLR_PATH="$2"
-fi
-
-if [ "$1" == "-h" ] ; then
- echo "$0 [[HOST:PORT] SOLR_PATH] (default $HOST_PORT/$SOLR_PATH)"
-fi
-
-curl http://$HOST:$PORT/$SOLR_PATH/update -H "Content-Type: text/xml" --data-binary '<commit waitFlush="false" waitSearcher="false" expungeDeletes="true"/>'
\ No newline at end of file
+++ /dev/null
-#!/bin/bash
-
-HOST_PORT="localhost:8080"
-
-if [ "$2" != "" ] ; then
- HOST_PORT="$2"
-fi
-
-SOLR_PATH=solr4
-if [ "$3" != "" ] ; then
- SOLR_PATH="$3"
-fi
-
-if [ "$1" == "" ] ; then
- echo "$0 databaseid [[HOST:PORT] PATH] (default $HOST_PORT/$PATH)"
- exit 1 ;
-fi
-
-curl http://${HOST_PORT}/$SOLR_PATH/update -H "Content-Type: text/xml" --data-binary "<delete><query>database:$1</query></delete>"
-curl http://${HOST_PORT}/$SOLR_PATH/update -H "Content-Type: text/xml" --data-binary '<commit waitFlush="false" waitSearcher="false"/>'
+++ /dev/null
-#!/bin/bash
-
-INSTALL_PATH=`pwd`
-CONFIG=$1
-mkdir -p ${CONFIG}
-ln -s ${INSTALL_PATH}/etc ${CONFIG}/
-ln -s ${INSTALL_PATH}/contexts ${CONFIG}/
-ln -s ${INSTALL_PATH}/lib ${CONFIG}/
-ln -s ${INSTALL_PATH}/webapps ${CONFIG}/
-ln -s ${INSTALL_PATH}/resources ${CONFIG}/
-ln -s ${INSTALL_PATH}/start.jar ${CONFIG}/
-ln -s ${INSTALL_PATH}/zookeeper.sh ${CONFIG}/
-cp -rp ${INSTALL_PATH}/solr ${CONFIG}/
-mkdir -p ${CONFIG}/logs
\ No newline at end of file
+++ /dev/null
-#!/bin/bash
-
-HOST_PORT=localhost:8080
-
-if [ "$2" != "" ] ; then
- HOST_PORT="$2"
-fi
-
-SOLR_PATH=solr4
-if [ "$3" != "" ] ; then
- SOLR_PATH="$3"
-fi
-
-if [ "$1" != "YES" ] ; then
- echo "$0 YES [[HOST:PORT] SOLR_PATH] (default $HOST_PORT/$SOLR_PATH)"
- exit 1 ;
-fi
-
-
-curl http://${HOST_PORT}/$SOLR_PATH/update -H "Content-Type: text/xml" --data-binary '<optimize waitSearcher="false" expungeDeletes="true" />'
+++ /dev/null
-#!/bin/bash
-
-NAME="indexdata-lui-solr-zookeeper"
-HOST=localhost
-PORT=8983
-SHARDS=2
-PID_FILE=/var/run/${NAME}.pid
-LOG_FILE=/var/log/${NAME}.log
-ZOOKEEPER=yes
-OPTIONS=${OPTIONS:-options}
-
-if [ -f "$OPTIONS" ]; then
- source $OPTIONS
-else
- echo "No options file ($OPTIONS). Using defaults."
-fi
-let ZKPORT=$PORT+1000
-if [ "$ZKHOSTS" == "" ] ; then
- ZKHOSTS=${HOST}:${ZKPORT}
-fi
-
-if [ "$BOOTSTRAP_CONF" != "" ] ; then
- BOOTSTRAP_OPT="-Dbootstrap_confdir=$BOOTSTRAP_CONF"
-fi
-if [ "$PORT" == "" ] ; then
- echo Port missing
-
-fi
-if [ "$ZKHOSTS" == "" ] ; then
- echo ZKHOSTS missing
-fi
-if [ "$SHARDS" == "" ] ; then
- echo SHARDS missing
-fi
-if [ "$ZOOKEEPER" == "yes" ] ; then
- ZKRUN="-DzkRun -DnumShards=${SHARDS}"
-fi
-if [ "${SOLR_HOME}" != "" ] ; then
- SOLR_HOME_OPT="-Dsolr.solr.home=${SOLR_HOME}"
-fi
-
-OPTIONS=" $SOLR_HOME_OPT -Djetty.port=$PORT ${BOOTSTRAP_OPT} -Dcollection.configName=$NAME ${ZKRUN} -DzkHost=${ZKHOSTS} "
-if [ "$1" == "start" ]; then
- if [ -f "${PID_FILE}" ] ; then
- echo "pid file ${PID_FILE} exists. Already running?"
- else
- java $OPTIONS -jar start.jar > $LOG_FILE &
- echo $! > ${PID_FILE}
- fi
-elif [ "$1" == "stop" ]; then
- if [ -f "${PID_FILE}" ] ; then
- PID="`/bin/cat ${PID_FILE}`"
- if [ "$PID" != "" ] ; then
- kill $PID
- /bin/rm ${PID_FILE}
- else
- echo "Unable to extract PID from ${PID_FILE}"
- fi
- else
- echo "No pid file ($PID_FILE) found"
- fi
-else
- echo "$0 [start|stop|status]"
-fi
+++ /dev/null
-*.log
-*.pid
-logs/
-solr/zoo_data
-solr-webapp/
\ No newline at end of file
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-Solr example
-------------
-
-This directory contains an instance of the Jetty Servlet container setup to
-run Solr using an example configuration.
-
-To run this example:
-
- java -jar start.jar
-
-in this example directory, and when Solr is started connect to
-
- http://localhost:8983/solr/
-
-To add documents to the index, use the post.jar (or post.sh script) in
-the example/exampledocs subdirectory (while Solr is running), for example:
-
- cd exampledocs
- java -jar post.jar *.xml
-Or: sh post.sh *.xml
-
-For more information about this example please read...
-
- * example/solr/README.txt
- For more information about the "Solr Home" and Solr specific configuration
- * http://lucene.apache.org/solr/tutorial.html
- For a Tutorial using this example configuration
- * http://wiki.apache.org/solr/SolrResources
- For a list of other tutorials and introductory articles.
-
-Notes About These Examples
---------------------------
-
-* SolrHome *
-
-By default, start.jar starts Solr in Jetty using the default Solr Home
-directory of "./solr/" (relative to the working directory of hte servlet
-container). To run other example configurations, you can specify the
-solr.solr.home system property when starting jetty...
-
- java -Dsolr.solr.home=multicore -jar start.jar
- java -Dsolr.solr.home=example-DIH/solr -jar start.jar
-
-* References to Jar Files Outside This Directory *
-
-Various example SolrHome dirs contained in this directory may use "<lib>"
-statements in the solrconfig.xml file to reference plugin jars outside of
-this directory for loading "contrib" plugins via relative paths.
-
-If you make a copy of this example server and wish to use the
-ExtractingRequestHandler (SolrCell), DataImportHandler (DIH), UIMA, the
-clustering component, or any other modules in "contrib", you will need to
-copy the required jars or update the paths to those jars in your
-solrconfig.xml.
-
-* Logging *
-
-By default, Jetty & Solr will log to the console a logs/solr.log. This can be convenient when
-first getting started, but eventually you will want to log just to a file. To
-configure logging, edit the log4j.properties file in "resources".
-
-It is also possible to setup log4j or other popular logging frameworks.
-
+++ /dev/null
-# Logging level
-log4j.rootLogger=INFO, stderr
-
-# log to stderr
-log4j.appender.stderr = org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.Target = System.err
-log4j.appender.stderr.layout = org.apache.log4j.PatternLayout
-log4j.appender.stderr.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n
+++ /dev/null
-REM You can override pass the following parameters to this script:\r
-REM \r
-\r
-set JVM=java\r
-\r
-REM Find location of this script\r
-\r
-set SDIR=%~dp0\r
-if "%SDIR:~-1%"=="\" set SDIR=%SDIR:~0,-1%\r
-\r
-"%JVM%" -Dlog4j.configuration=file:%SDIR%\log4j.properties -classpath "%SDIR%\..\solr-webapp\webapp\WEB-INF\lib\*;%SDIR%\..\lib\ext\*" org.apache.solr.cloud.ZkCLI %*\r
+++ /dev/null
-#!/usr/bin/env bash
-
-# You can override pass the following parameters to this script:
-#
-
-JVM="java"
-
-# Find location of this script
-
-sdir="`dirname \"$0\"`"
-
-PATH=$JAVA_HOME/bin:$PATH $JVM -Dlog4j.configuration=file:$sdir/log4j.properties -classpath "$sdir/../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../lib/ext/*" org.apache.solr.cloud.ZkCLI ${1+"$@"}
-
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE Configure PUBLIC "-//Jetty//Configure//EN" "http://www.eclipse.org/jetty/configure.dtd">
-<Configure class="org.eclipse.jetty.webapp.WebAppContext">
- <Set name="contextPath"><SystemProperty name="hostContext" default="/solr"/></Set>
- <Set name="war"><SystemProperty name="jetty.home"/>/webapps/solr.war</Set>
- <Set name="defaultsDescriptor"><SystemProperty name="jetty.home"/>/etc/webdefault.xml</Set>
- <Set name="tempDirectory"><Property name="jetty.home" default="."/>/solr-webapp</Set>
-</Configure>
+++ /dev/null
-#!/bin/bash -ex
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-############
-
-# This script shows how the solrtest.keystore file used for solr tests
-# and these example configs was generated.
-#
-# Running this script should only be necessary if the keystore file
-# needs to be replaced, which shouldn't be required until sometime around
-# the year 4751.
-#
-# NOTE: the "-ext" option used in the "keytool" command requires that you have
-# the java7 version of keytool, but the generated key will work with any
-# version of java
-
-echo "### remove old keystore"
-rm -f solrtest.keystore
-
-echo "### create keystore and keys"
-keytool -keystore solrtest.keystore -storepass "secret" -alias solrtest -keypass "secret" -genkey -keyalg RSA -dname "cn=localhost, ou=SolrTest, o=lucene.apache.org, c=US" -ext "san=ip:127.0.0.1" -validity 999999
-
-
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE Configure PUBLIC "-//Jetty//Configure//EN" "http://www.eclipse.org/jetty/configure.dtd">
-
-<!-- =============================================================== -->
-<!-- Configure the Jetty Server -->
-<!-- -->
-<!-- Documentation of this file format can be found at: -->
-<!-- http://wiki.eclipse.org/Jetty/Reference/jetty.xml_syntax -->
-<!-- -->
-<!-- =============================================================== -->
-
-
-<Configure id="Server" class="org.eclipse.jetty.server.Server">
-
- <!-- =========================================================== -->
- <!-- Server Thread Pool -->
- <!-- =========================================================== -->
- <Set name="ThreadPool">
- <!-- Default queued blocking threadpool -->
- <New class="org.eclipse.jetty.util.thread.QueuedThreadPool">
- <Set name="minThreads">10</Set>
- <Set name="maxThreads">10000</Set>
- <Set name="detailedDump">false</Set>
- </New>
- </Set>
-
- <!-- =========================================================== -->
- <!-- Set connectors -->
- <!-- =========================================================== -->
-
- <!--
- <Call name="addConnector">
- <Arg>
- <New class="org.eclipse.jetty.server.nio.SelectChannelConnector">
- <Set name="host"><SystemProperty name="jetty.host" /></Set>
- <Set name="port"><SystemProperty name="jetty.port" default="8983"/></Set>
- <Set name="maxIdleTime">50000</Set>
- <Set name="Acceptors">2</Set>
- <Set name="statsOn">false</Set>
- <Set name="confidentialPort">8443</Set>
- <Set name="lowResourcesConnections">5000</Set>
- <Set name="lowResourcesMaxIdleTime">5000</Set>
- </New>
- </Arg>
- </Call>
- -->
-
- <!-- This connector is currently being used for Solr because it
- showed better performance than nio.SelectChannelConnector
- for typical Solr requests. -->
- <Call name="addConnector">
- <Arg>
- <New class="org.eclipse.jetty.server.bio.SocketConnector">
- <Call class="java.lang.System" name="setProperty"> <Arg>log4j.configuration</Arg> <Arg>etc/log4j.properties</Arg> </Call>
- <Set name="host"><SystemProperty name="jetty.host" /></Set>
- <Set name="port"><SystemProperty name="jetty.port" default="8983"/></Set>
- <Set name="maxIdleTime">50000</Set>
- <Set name="lowResourceMaxIdleTime">1500</Set>
- <Set name="statsOn">false</Set>
- </New>
- </Arg>
- </Call>
-
- <!-- if the connector below is uncommented, then jetty will also accept SSL
- connections on port 8984, using a self signed certificate and can
- optionally require the client to authenticate with a certificate.
- (which can be the same as the server certificate_
-
- # Run solr example with SSL on port 8984
- java -jar start.jar
- #
- # Run post.jar so that it trusts the server cert...
- java -Djavax.net.ssl.trustStore=../etc/solrtest.keystore -Durl=https://localhost:8984/solr/update -jar post.jar *.xml
-
- # Run solr example with SSL requiring client certs on port 8984
- java -Djetty.ssl.clientAuth=true -jar start.jar
- #
- # Run post.jar so that it trusts the server cert,
- # and authenticates with a client cert
- java -Djavax.net.ssl.keyStorePassword=secret -Djavax.net.ssl.keyStore=../etc/solrtest.keystore -Djavax.net.ssl.trustStore=../etc/solrtest.keystore -Durl=https://localhost:8984/solr/update -jar post.jar *.xml
-
- -->
- <!--
- <Call name="addConnector">
- <Arg>
- <New class="org.eclipse.jetty.server.ssl.SslSelectChannelConnector">
- <Arg>
- <New class="org.eclipse.jetty.http.ssl.SslContextFactory">
- <Set name="keyStore"><SystemProperty name="jetty.home" default="."/>/etc/solrtest.keystore</Set>
- <Set name="keyStorePassword">secret</Set>
- <Set name="needClientAuth"><SystemProperty name="jetty.ssl.clientAuth" default="false"/></Set>
- </New>
- </Arg>
- <Set name="port"><SystemProperty name="jetty.ssl.port" default="8984"/></Set>
- <Set name="maxIdleTime">30000</Set>
- </New>
- </Arg>
- </Call>
- -->
-
- <!-- =========================================================== -->
- <!-- Set handler Collection Structure -->
- <!-- =========================================================== -->
- <Set name="handler">
- <New id="Handlers" class="org.eclipse.jetty.server.handler.HandlerCollection">
- <Set name="handlers">
- <Array type="org.eclipse.jetty.server.Handler">
- <Item>
- <New id="Contexts" class="org.eclipse.jetty.server.handler.ContextHandlerCollection"/>
- </Item>
- <Item>
- <New id="DefaultHandler" class="org.eclipse.jetty.server.handler.DefaultHandler"/>
- </Item>
- <Item>
- <New id="RequestLog" class="org.eclipse.jetty.server.handler.RequestLogHandler"/>
- </Item>
- </Array>
- </Set>
- </New>
- </Set>
-
- <!-- =========================================================== -->
- <!-- Configure Request Log -->
- <!-- =========================================================== -->
- <!--
- <Ref id="Handlers">
- <Call name="addHandler">
- <Arg>
- <New id="RequestLog" class="org.eclipse.jetty.server.handler.RequestLogHandler">
- <Set name="requestLog">
- <New id="RequestLogImpl" class="org.eclipse.jetty.server.NCSARequestLog">
- <Set name="filename">
- logs/request.yyyy_mm_dd.log
- </Set>
- <Set name="filenameDateFormat">yyyy_MM_dd</Set>
- <Set name="retainDays">90</Set>
- <Set name="append">true</Set>
- <Set name="extended">false</Set>
- <Set name="logCookies">false</Set>
- <Set name="LogTimeZone">UTC</Set>
- </New>
- </Set>
- </New>
- </Arg>
- </Call>
- </Ref>
- -->
-
- <!-- =========================================================== -->
- <!-- extra options -->
- <!-- =========================================================== -->
- <Set name="stopAtShutdown">true</Set>
- <Set name="sendServerVersion">false</Set>
- <Set name="sendDateHeader">false</Set>
- <Set name="gracefulShutdown">1000</Set>
- <Set name="dumpAfterStart">false</Set>
- <Set name="dumpBeforeStop">false</Set>
-
-
-
-
- <Call name="addBean">
- <Arg>
- <New id="DeploymentManager" class="org.eclipse.jetty.deploy.DeploymentManager">
- <Set name="contexts">
- <Ref id="Contexts" />
- </Set>
- <Call name="setContextAttribute">
- <Arg>org.eclipse.jetty.server.webapp.ContainerIncludeJarPattern</Arg>
- <Arg>.*/servlet-api-[^/]*\.jar$</Arg>
- </Call>
-
-
- <!-- Add a customize step to the deployment lifecycle -->
- <!-- uncomment and replace DebugBinding with your extended AppLifeCycle.Binding class
- <Call name="insertLifeCycleNode">
- <Arg>deployed</Arg>
- <Arg>starting</Arg>
- <Arg>customise</Arg>
- </Call>
- <Call name="addLifeCycleBinding">
- <Arg>
- <New class="org.eclipse.jetty.deploy.bindings.DebugBinding">
- <Arg>customise</Arg>
- </New>
- </Arg>
- </Call>
- -->
-
- </New>
- </Arg>
- </Call>
-
- <Ref id="DeploymentManager">
- <Call name="addAppProvider">
- <Arg>
- <New class="org.eclipse.jetty.deploy.providers.ContextProvider">
- <Set name="monitoredDirName"><SystemProperty name="jetty.home" default="."/>/contexts</Set>
- <Set name="scanInterval">0</Set>
- </New>
- </Arg>
- </Call>
- </Ref>
-
-</Configure>
+++ /dev/null
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# To use this log config, start solr with the following system property:
-# -Djava.util.logging.config.file=etc/logging.properties
-
-## Default global logging level:
-.level = INFO
-
-## Log every update command (add, delete, commit, ...)
-#org.apache.solr.update.processor.LogUpdateProcessor.level = FINE
-
-## Where to log (space separated list).
-handlers = java.util.logging.FileHandler
-
-java.util.logging.FileHandler.level = FINE
-
-java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter
-
-# 1 GB limit per file
-java.util.logging.FileHandler.limit = 1073741824
-
-# Log to the logs directory, with log files named solrxxx.log
-java.util.logging.FileHandler.pattern = ./logs/solr%u.log
\ No newline at end of file
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-
- <!-- ===================================================================== -->
- <!-- This file contains the default descriptor for web applications. -->
- <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
- <!-- The intent of this descriptor is to include jetty specific or common -->
- <!-- configuration for all webapps. If a context has a webdefault.xml -->
- <!-- descriptor, it is applied before the contexts own web.xml file -->
- <!-- -->
- <!-- A context may be assigned a default descriptor by: -->
- <!-- + Calling WebApplicationContext.setDefaultsDescriptor -->
- <!-- + Passed an arg to addWebApplications -->
- <!-- -->
- <!-- This file is used both as the resource within the jetty.jar (which is -->
- <!-- used as the default if no explicit defaults descriptor is set) and it -->
- <!-- is copied to the etc directory of the Jetty distro and explicitly -->
- <!-- by the jetty.xml file. -->
- <!-- -->
- <!-- ===================================================================== -->
-<web-app
- xmlns="http://java.sun.com/xml/ns/javaee"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
- metadata-complete="true"
- version="2.5"
->
-
- <description>
- Default web.xml file.
- This file is applied to a Web application before it's own WEB_INF/web.xml file
- </description>
-
- <!-- ==================================================================== -->
- <!-- Removes static references to beans from javax.el.BeanELResolver to -->
- <!-- ensure webapp classloader can be released on undeploy -->
- <!-- ==================================================================== -->
- <listener>
- <listener-class>org.eclipse.jetty.servlet.listener.ELContextCleaner</listener-class>
- </listener>
-
- <!-- ==================================================================== -->
- <!-- Removes static cache of Methods from java.beans.Introspector to -->
- <!-- ensure webapp classloader can be released on undeploy -->
- <!-- ==================================================================== -->
- <listener>
- <listener-class>org.eclipse.jetty.servlet.listener.IntrospectorCleaner</listener-class>
- </listener>
-
-
- <!-- ==================================================================== -->
- <!-- Context params to control Session Cookies -->
- <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
- <!--
- UNCOMMENT TO ACTIVATE <context-param> <param-name>org.eclipse.jetty.servlet.SessionDomain</param-name> <param-value>127.0.0.1</param-value> </context-param> <context-param>
- <param-name>org.eclipse.jetty.servlet.SessionPath</param-name> <param-value>/</param-value> </context-param> <context-param> <param-name>org.eclipse.jetty.servlet.MaxAge</param-name>
- <param-value>-1</param-value> </context-param>
- -->
-
- <!-- ==================================================================== -->
- <!-- The default servlet. -->
- <!-- This servlet, normally mapped to /, provides the handling for static -->
- <!-- content, OPTIONS and TRACE methods for the context. -->
- <!-- The following initParameters are supported: -->
- <!--
- * acceptRanges If true, range requests and responses are
- * supported
- *
- * dirAllowed If true, directory listings are returned if no
- * welcome file is found. Else 403 Forbidden.
- *
- * welcomeServlets If true, attempt to dispatch to welcome files
- * that are servlets, but only after no matching static
- * resources could be found. If false, then a welcome
- * file must exist on disk. If "exact", then exact
- * servlet matches are supported without an existing file.
- * Default is true.
- *
- * This must be false if you want directory listings,
- * but have index.jsp in your welcome file list.
- *
- * redirectWelcome If true, welcome files are redirected rather than
- * forwarded to.
- *
- * gzip If set to true, then static content will be served as
- * gzip content encoded if a matching resource is
- * found ending with ".gz"
- *
- * resourceBase Set to replace the context resource base
- *
- * resourceCache If set, this is a context attribute name, which the servlet
- * will use to look for a shared ResourceCache instance.
- *
- * relativeResourceBase
- * Set with a pathname relative to the base of the
- * servlet context root. Useful for only serving static content out
- * of only specific subdirectories.
- *
- * aliases If True, aliases of resources are allowed (eg. symbolic
- * links and caps variations). May bypass security constraints.
- *
- * maxCacheSize The maximum total size of the cache or 0 for no cache.
- * maxCachedFileSize The maximum size of a file to cache
- * maxCachedFiles The maximum number of files to cache
- *
- * useFileMappedBuffer
- * If set to true, it will use mapped file buffer to serve static content
- * when using NIO connector. Setting this value to false means that
- * a direct buffer will be used instead of a mapped file buffer.
- * By default, this is set to true.
- *
- * cacheControl If set, all static content will have this value set as the cache-control
- * header.
- -->
-
-
- <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
- <servlet>
- <servlet-name>default</servlet-name>
- <servlet-class>org.eclipse.jetty.servlet.DefaultServlet</servlet-class>
- <init-param>
- <param-name>aliases</param-name>
- <param-value>false</param-value>
- </init-param>
- <init-param>
- <param-name>acceptRanges</param-name>
- <param-value>true</param-value>
- </init-param>
- <init-param>
- <param-name>dirAllowed</param-name>
- <param-value>true</param-value>
- </init-param>
- <init-param>
- <param-name>welcomeServlets</param-name>
- <param-value>false</param-value>
- </init-param>
- <init-param>
- <param-name>redirectWelcome</param-name>
- <param-value>false</param-value>
- </init-param>
- <init-param>
- <param-name>maxCacheSize</param-name>
- <param-value>256000000</param-value>
- </init-param>
- <init-param>
- <param-name>maxCachedFileSize</param-name>
- <param-value>200000000</param-value>
- </init-param>
- <init-param>
- <param-name>maxCachedFiles</param-name>
- <param-value>2048</param-value>
- </init-param>
- <init-param>
- <param-name>gzip</param-name>
- <param-value>true</param-value>
- </init-param>
- <init-param>
- <param-name>useFileMappedBuffer</param-name>
- <param-value>true</param-value>
- </init-param>
- <!--
- <init-param>
- <param-name>resourceCache</param-name>
- <param-value>resourceCache</param-value>
- </init-param>
- -->
- <!--
- <init-param>
- <param-name>cacheControl</param-name>
- <param-value>max-age=3600,public</param-value>
- </init-param>
- -->
- <load-on-startup>0</load-on-startup>
- </servlet>
-
- <servlet-mapping>
- <servlet-name>default</servlet-name>
- <url-pattern>/</url-pattern>
- </servlet-mapping>
-
-
- <!-- ==================================================================== -->
- <!-- JSP Servlet -->
- <!-- This is the jasper JSP servlet from the jakarta project -->
- <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
- <!-- The JSP page compiler and execution servlet, which is the mechanism -->
- <!-- used by Glassfish to support JSP pages. Traditionally, this servlet -->
- <!-- is mapped to URL patterh "*.jsp". This servlet supports the -->
- <!-- following initialization parameters (default values are in square -->
- <!-- brackets): -->
- <!-- -->
- <!-- checkInterval If development is false and reloading is true, -->
- <!-- background compiles are enabled. checkInterval -->
- <!-- is the time in seconds between checks to see -->
- <!-- if a JSP page needs to be recompiled. [300] -->
- <!-- -->
- <!-- compiler Which compiler Ant should use to compile JSP -->
- <!-- pages. See the Ant documenation for more -->
- <!-- information. [javac] -->
- <!-- -->
- <!-- classdebuginfo Should the class file be compiled with -->
- <!-- debugging information? [true] -->
- <!-- -->
- <!-- classpath What class path should I use while compiling -->
- <!-- generated servlets? [Created dynamically -->
- <!-- based on the current web application] -->
- <!-- Set to ? to make the container explicitly set -->
- <!-- this parameter. -->
- <!-- -->
- <!-- development Is Jasper used in development mode (will check -->
- <!-- for JSP modification on every access)? [true] -->
- <!-- -->
- <!-- enablePooling Determines whether tag handler pooling is -->
- <!-- enabled [true] -->
- <!-- -->
- <!-- fork Tell Ant to fork compiles of JSP pages so that -->
- <!-- a separate JVM is used for JSP page compiles -->
- <!-- from the one Tomcat is running in. [true] -->
- <!-- -->
- <!-- ieClassId The class-id value to be sent to Internet -->
- <!-- Explorer when using <jsp:plugin> tags. -->
- <!-- [clsid:8AD9C840-044E-11D1-B3E9-00805F499D93] -->
- <!-- -->
- <!-- javaEncoding Java file encoding to use for generating java -->
- <!-- source files. [UTF-8] -->
- <!-- -->
- <!-- keepgenerated Should we keep the generated Java source code -->
- <!-- for each page instead of deleting it? [true] -->
- <!-- -->
- <!-- logVerbosityLevel The level of detailed messages to be produced -->
- <!-- by this servlet. Increasing levels cause the -->
- <!-- generation of more messages. Valid values are -->
- <!-- FATAL, ERROR, WARNING, INFORMATION, and DEBUG. -->
- <!-- [WARNING] -->
- <!-- -->
- <!-- mappedfile Should we generate static content with one -->
- <!-- print statement per input line, to ease -->
- <!-- debugging? [false] -->
- <!-- -->
- <!-- -->
- <!-- reloading Should Jasper check for modified JSPs? [true] -->
- <!-- -->
- <!-- suppressSmap Should the generation of SMAP info for JSR45 -->
- <!-- debugging be suppressed? [false] -->
- <!-- -->
- <!-- dumpSmap Should the SMAP info for JSR45 debugging be -->
- <!-- dumped to a file? [false] -->
- <!-- False if suppressSmap is true -->
- <!-- -->
- <!-- scratchdir What scratch directory should we use when -->
- <!-- compiling JSP pages? [default work directory -->
- <!-- for the current web application] -->
- <!-- -->
- <!-- tagpoolMaxSize The maximum tag handler pool size [5] -->
- <!-- -->
- <!-- xpoweredBy Determines whether X-Powered-By response -->
- <!-- header is added by generated servlet [false] -->
- <!-- -->
- <!-- If you wish to use Jikes to compile JSP pages: -->
- <!-- Set the init parameter "compiler" to "jikes". Define -->
- <!-- the property "-Dbuild.compiler.emacs=true" when starting Jetty -->
- <!-- to cause Jikes to emit error messages in a format compatible with -->
- <!-- Jasper. -->
- <!-- If you get an error reporting that jikes can't use UTF-8 encoding, -->
- <!-- try setting the init parameter "javaEncoding" to "ISO-8859-1". -->
- <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
- <servlet
- id="jsp"
- >
- <servlet-name>jsp</servlet-name>
- <servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>
- <init-param>
- <param-name>logVerbosityLevel</param-name>
- <param-value>DEBUG</param-value>
- </init-param>
- <init-param>
- <param-name>fork</param-name>
- <param-value>false</param-value>
- </init-param>
- <init-param>
- <param-name>xpoweredBy</param-name>
- <param-value>false</param-value>
- </init-param>
- <!--
- <init-param>
- <param-name>classpath</param-name>
- <param-value>?</param-value>
- </init-param>
- -->
- <load-on-startup>0</load-on-startup>
- </servlet>
-
- <servlet-mapping>
- <servlet-name>jsp</servlet-name>
- <url-pattern>*.jsp</url-pattern>
- <url-pattern>*.jspf</url-pattern>
- <url-pattern>*.jspx</url-pattern>
- <url-pattern>*.xsp</url-pattern>
- <url-pattern>*.JSP</url-pattern>
- <url-pattern>*.JSPF</url-pattern>
- <url-pattern>*.JSPX</url-pattern>
- <url-pattern>*.XSP</url-pattern>
- </servlet-mapping>
-
- <!-- ==================================================================== -->
- <!-- Dynamic Servlet Invoker. -->
- <!-- This servlet invokes anonymous servlets that have not been defined -->
- <!-- in the web.xml or by other means. The first element of the pathInfo -->
- <!-- of a request passed to the envoker is treated as a servlet name for -->
- <!-- an existing servlet, or as a class name of a new servlet. -->
- <!-- This servlet is normally mapped to /servlet/* -->
- <!-- This servlet support the following initParams: -->
- <!-- -->
- <!-- nonContextServlets If false, the invoker can only load -->
- <!-- servlets from the contexts classloader. -->
- <!-- This is false by default and setting this -->
- <!-- to true may have security implications. -->
- <!-- -->
- <!-- verbose If true, log dynamic loads -->
- <!-- -->
- <!-- * All other parameters are copied to the -->
- <!-- each dynamic servlet as init parameters -->
- <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
- <!--
- Uncomment for dynamic invocation <servlet> <servlet-name>invoker</servlet-name> <servlet-class>org.eclipse.jetty.servlet.Invoker</servlet-class> <init-param> <param-name>verbose</param-name>
- <param-value>false</param-value> </init-param> <init-param> <param-name>nonContextServlets</param-name> <param-value>false</param-value> </init-param> <init-param>
- <param-name>dynamicParam</param-name> <param-value>anyValue</param-value> </init-param> <load-on-startup>0</load-on-startup> </servlet> <servlet-mapping> <servlet-name>invoker</servlet-name>
- <url-pattern>/servlet/*</url-pattern> </servlet-mapping>
- -->
-
-
-
- <!-- ==================================================================== -->
- <session-config>
- <session-timeout>30</session-timeout>
- </session-config>
-
- <!-- ==================================================================== -->
- <!-- Default MIME mappings -->
- <!-- The default MIME mappings are provided by the mime.properties -->
- <!-- resource in the org.eclipse.jetty.server.jar file. Additional or modified -->
- <!-- mappings may be specified here -->
- <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
- <!-- UNCOMMENT TO ACTIVATE
- <mime-mapping>
- <extension>mysuffix</extension>
- <mime-type>mymime/type</mime-type>
- </mime-mapping>
- -->
-
- <!-- ==================================================================== -->
- <welcome-file-list>
- <welcome-file>index.html</welcome-file>
- <welcome-file>index.htm</welcome-file>
- <welcome-file>index.jsp</welcome-file>
- </welcome-file-list>
-
- <!-- ==================================================================== -->
- <locale-encoding-mapping-list>
- <locale-encoding-mapping>
- <locale>ar</locale>
- <encoding>ISO-8859-6</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>be</locale>
- <encoding>ISO-8859-5</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>bg</locale>
- <encoding>ISO-8859-5</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>ca</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>cs</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>da</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>de</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>el</locale>
- <encoding>ISO-8859-7</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>en</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>es</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>et</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>fi</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>fr</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>hr</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>hu</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>is</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>it</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>iw</locale>
- <encoding>ISO-8859-8</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>ja</locale>
- <encoding>Shift_JIS</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>ko</locale>
- <encoding>EUC-KR</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>lt</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>lv</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>mk</locale>
- <encoding>ISO-8859-5</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>nl</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>no</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>pl</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>pt</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>ro</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>ru</locale>
- <encoding>ISO-8859-5</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>sh</locale>
- <encoding>ISO-8859-5</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>sk</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>sl</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>sq</locale>
- <encoding>ISO-8859-2</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>sr</locale>
- <encoding>ISO-8859-5</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>sv</locale>
- <encoding>ISO-8859-1</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>tr</locale>
- <encoding>ISO-8859-9</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>uk</locale>
- <encoding>ISO-8859-5</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>zh</locale>
- <encoding>GB2312</encoding>
- </locale-encoding-mapping>
- <locale-encoding-mapping>
- <locale>zh_TW</locale>
- <encoding>Big5</encoding>
- </locale-encoding-mapping>
- </locale-encoding-mapping-list>
-
- <security-constraint>
- <web-resource-collection>
- <web-resource-name>Disable TRACE</web-resource-name>
- <url-pattern>/</url-pattern>
- <http-method>TRACE</http-method>
- </web-resource-collection>
- <auth-constraint/>
- </security-constraint>
-
-</web-app>
-
+++ /dev/null
-<?xml version="1.0" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
-
-<currencyConfig version="1.0">
- <rates>
- <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
- <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
- <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
- <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
- <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
- <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
- <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
- <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
- <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
- <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
- <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
- <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
- <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
- <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
- <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
- <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
- <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
- <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
- <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
- <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
- <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
- <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
- <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
- <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
- <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
- <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
- <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
- <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
- <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
- <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
- <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
- <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
- <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
- <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
- <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
- <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
- <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
- <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
-
- <!-- Cross-rates for some common currencies -->
- <rate from="EUR" to="GBP" rate="0.869914" />
- <rate from="EUR" to="NOK" rate="7.800095" />
- <rate from="GBP" to="NOK" rate="8.966508" />
- </rates>
-</currencyConfig>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- If this file is found in the config directory, it will only be
- loaded once at startup. If it is found in Solr's data
- directory, it will be re-loaded every commit.
-
- See http://wiki.apache.org/solr/QueryElevationComponent for more info
-
--->
-<elevate>
- <query text="foo bar">
- <doc id="1" />
- <doc id="2" />
- <doc id="3" />
- </query>
-
- <query text="ipod">
- <doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
- <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
- </query>
-
-</elevate>
+++ /dev/null
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
+++ /dev/null
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
-d
-c
-jusqu
-quoiqu
-lorsqu
-puisqu
+++ /dev/null
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
+++ /dev/null
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
+++ /dev/null
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
+++ /dev/null
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
+++ /dev/null
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#名詞
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#名詞-一般
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#名詞-固有名詞
-#
-# noun-proper-misc: miscellaneous proper nouns
-#名詞-固有名詞-一般
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#名詞-固有名詞-人名
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. お市の方
-#名詞-固有名詞-人名-一般
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. 山田
-#名詞-固有名詞-人名-姓
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. 太郎
-#名詞-固有名詞-人名-名
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. 通産省, NHK
-#名詞-固有名詞-組織
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#名詞-固有名詞-地域
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. アジア, バルセロナ, 京都
-#名詞-固有名詞-地域-一般
-#
-# noun-proper-place-country: Country names.
-# e.g. 日本, オーストラリア
-#名詞-固有名詞-地域-国
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#名詞-代名詞
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
-#名詞-代名詞-一般
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
-#名詞-代名詞-縮約
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. 金曜, 一月, 午後, 少量
-#名詞-副詞可能
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (する, できる, なさる, くださる)
-# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
-#名詞-サ変接続
-#
-# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
-# e.g. 健康, 安易, 駄目, だめ
-#名詞-形容動詞語幹
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
-# e.g. 0, 1, 2, 何, 数, 幾
-#名詞-数
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#名詞-非自立
-#
-# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
-# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
-# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
-# わり, 割り, 割, ん-口語/, もん-口語/
-#名詞-非自立-一般
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
-# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
-# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
-# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
-# 儘, 侭, みぎり, 矢先
-#名詞-非自立-副詞可能
-#
-# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
-# with the stem よう(だ) ("you(da)").
-# e.g. よう, やう, 様 (よう)
-#名詞-非自立-助動詞語幹
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form な (aux "da").
-# e.g. みたい, ふう
-#名詞-非自立-形容動詞語幹
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#名詞-特殊
-#
-# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
-# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. そう
-#名詞-特殊-助動詞語幹
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#名詞-接尾
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# 接尾語 ("suffix") and is usually the last element in a compound noun.
-# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
-# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
-#名詞-接尾-一般
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. 君, 様, 著
-#名詞-接尾-人名
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. 町, 市, 県
-#名詞-接尾-地域
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before スル ("suru").
-# e.g. 化, 視, 分け, 入り, 落ち, 買い
-#名詞-接尾-サ変接続
-#
-# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
-# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. そう
-#名詞-接尾-助動詞語幹
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula だ ("da").
-# e.g. 的, げ, がち
-#名詞-接尾-形容動詞語幹
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
-#名詞-接尾-副詞可能
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
-#名詞-接尾-助数詞
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (楽し) さ, (考え) 方
-#名詞-接尾-特殊
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
-#名詞-接続詞的
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
-# semantically verb-like.
-# e.g. ごらん, ご覧, 御覧, 頂戴
-#名詞-動詞非自立的
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
-# is いわく ("iwaku").
-#名詞-引用文字列
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
-# behave like an adjective.
-# e.g. 申し訳, 仕方, とんでも, 違い
-#名詞-ナイ形容詞語幹
-#
-#####
-# prefix: unclassified prefixes
-#接頭詞
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
-#接頭詞-名詞接続
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by なる/なさる/くださる.
-# e.g. お (読みなさい), お (座り)
-#接頭詞-動詞接続
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. お (寒いですねえ), バカ (でかい)
-#接頭詞-形容詞接続
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. 約, およそ, 毎時
-#接頭詞-数接続
-#
-#####
-# verb: unclassified verbs
-#動詞
-#
-# verb-main:
-#動詞-自立
-#
-# verb-auxiliary:
-#動詞-非自立
-#
-# verb-suffix:
-#動詞-接尾
-#
-#####
-# adjective: unclassified adjectives
-#形容詞
-#
-# adjective-main:
-#形容詞-自立
-#
-# adjective-auxiliary:
-#形容詞-非自立
-#
-# adjective-suffix:
-#形容詞-接尾
-#
-#####
-# adverb: unclassified adverbs
-#副詞
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. あいかわらず, 多分
-#副詞-一般
-#
-# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
-# な, する, だ, etc.
-# e.g. こんなに, そんなに, あんなに, なにか, なんでも
-#副詞-助詞類接続
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
-# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
-# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
-#連体詞
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. が, けれども, そして, じゃあ, それどころか
-接続詞
-#
-#####
-# particle: unclassified particles.
-助詞
-#
-# particle-case: case particles where the subclassification is undefined.
-助詞-格助詞
-#
-# particle-case-misc: Case particles.
-# e.g. から, が, で, と, に, へ, より, を, の, にて
-助詞-格助詞-一般
-#
-# particle-case-quote: the "to" that appears after nouns, a person’s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
-助詞-格助詞-引用
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
-# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
-# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
-# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
-# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
-# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
-# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
-# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
-助詞-格助詞-連語
-#
-# particle-conjunctive:
-# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
-# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
-# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
-助詞-接続助詞
-#
-# particle-dependency:
-# e.g. こそ, さえ, しか, すら, は, も, ぞ
-助詞-係助詞
-#
-# particle-adverbial:
-# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
-# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
-# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
-# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
-# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
-助詞-副助詞
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (松島) や
-助詞-間投助詞
-#
-# particle-coordinate:
-# e.g. と, たり, だの, だり, とか, なり, や, やら
-助詞-並立助詞
-#
-# particle-final:
-# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
-# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
-助詞-終助詞
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
-# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
-# 「(祈りが届いたせい) か (, 試験に合格した.)」
-# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
-# e.g. か
-助詞-副助詞/並立助詞/終助詞
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-助詞-連体化
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. に, と
-助詞-副詞化
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
-助詞-特殊
-#
-#####
-# auxiliary-verb:
-助動詞
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
-# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
-#感動詞
-#
-#####
-# symbol: unclassified Symbols.
-記号
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [○◎@$〒→+]
-記号-一般
-#
-# symbol-comma: Commas
-# e.g. [,、]
-記号-読点
-#
-# symbol-period: Periods and full stops.
-# e.g. [..。]
-記号-句点
-#
-# symbol-space: Full-width whitespace.
-記号-空白
-#
-# symbol-open_bracket:
-# e.g. [({‘“『【]
-記号-括弧開
-#
-# symbol-close_bracket:
-# e.g. [)}’”』」】]
-記号-括弧閉
-#
-# symbol-alphabetic:
-#記号-アルファベット
-#
-#####
-# other: unclassified other
-#その他
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (だ)ァ
-その他-間投
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. あの, うんと, えと
-フィラー
-#
-#####
-# non-verbal: non-verbal sound.
-非言語音
-#
-#####
-# fragment:
-#語断片
-#
-#####
-# unknown: unknown part of speech.
-#未知語
-#
-##### End of file
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both أ and ا
-من
-ومن
-منها
-منه
-في
-وفي
-فيها
-فيه
-و
-ف
-ثم
-او
-أو
-ب
-بها
-به
-ا
-أ
-اى
-اي
-أي
-أى
-لا
-ولا
-الا
-ألا
-إلا
-لكن
-ما
-وما
-كما
-فما
-عن
-مع
-اذا
-إذا
-ان
-أن
-إن
-انها
-أنها
-إنها
-انه
-أنه
-إنه
-بان
-بأن
-فان
-فأن
-وان
-وأن
-وإن
-التى
-التي
-الذى
-الذي
-الذين
-الى
-الي
-إلى
-إلي
-على
-عليها
-عليه
-اما
-أما
-إما
-ايضا
-أيضا
-كل
-وكل
-لم
-ولم
-لن
-ولن
-هى
-هي
-هو
-وهى
-وهي
-وهو
-فهى
-فهي
-فهو
-انت
-أنت
-لك
-لها
-له
-هذه
-هذا
-تلك
-ذلك
-هناك
-كانت
-كان
-يكون
-تكون
-وكانت
-وكان
-غير
-بعض
-قد
-نحو
-بين
-بينما
-منذ
-ضمن
-حيث
-الان
-الآن
-خلال
-بعد
-قبل
-حتى
-عند
-عندما
-لدى
-جميع
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-а
-аз
-ако
-ала
-бе
-без
-беше
-би
-бил
-била
-били
-било
-близо
-бъдат
-бъде
-бяха
-в
-вас
-ваш
-ваша
-вероятно
-вече
-взема
-ви
-вие
-винаги
-все
-всеки
-всички
-всичко
-всяка
-във
-въпреки
-върху
-г
-ги
-главно
-го
-д
-да
-дали
-до
-докато
-докога
-дори
-досега
-доста
-е
-едва
-един
-ето
-за
-зад
-заедно
-заради
-засега
-затова
-защо
-защото
-и
-из
-или
-им
-има
-имат
-иска
-й
-каза
-как
-каква
-какво
-както
-какъв
-като
-кога
-когато
-което
-които
-кой
-който
-колко
-която
-къде
-където
-към
-ли
-м
-ме
-между
-мен
-ми
-мнозина
-мога
-могат
-може
-моля
-момента
-му
-н
-на
-над
-назад
-най
-направи
-напред
-например
-нас
-не
-него
-нея
-ни
-ние
-никой
-нито
-но
-някои
-някой
-няма
-обаче
-около
-освен
-особено
-от
-отгоре
-отново
-още
-пак
-по
-повече
-повечето
-под
-поне
-поради
-после
-почти
-прави
-пред
-преди
-през
-при
-пък
-първо
-с
-са
-само
-се
-сега
-си
-скоро
-след
-сме
-според
-сред
-срещу
-сте
-съм
-със
-също
-т
-тази
-така
-такива
-такъв
-там
-твой
-те
-тези
-ти
-тн
-то
-това
-тогава
-този
-той
-толкова
-точно
-трябва
-тук
-тъй
-тя
-тях
-у
-харесва
-ч
-че
-често
-чрез
-ще
-щом
-я
+++ /dev/null
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ací
-ah
-així
-això
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-allà
-allí
-allò
-altra
-altre
-altres
-amb
-ambdós
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aquí
-baix
-cada
-cadascú
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-després
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-érem
-eren
-éreu
-es
-és
-esta
-està
-estàvem
-estaven
-estàveu
-esteu
-et
-etc
-ets
-fins
-fora
-gairebé
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-més
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-només
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-però
-perquè
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-què
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-sóc
-solament
-sols
-son
-són
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-també
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
+++ /dev/null
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-tímto
-budeš
-budem
-byli
-jseš
-můj
-svým
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-proč
-máte
-tato
-kam
-tohoto
-kdo
-kteří
-mi
-nám
-tom
-tomuto
-mít
-nic
-proto
-kterou
-byla
-toho
-protože
-asi
-ho
-naši
-napište
-re
-což
-tím
-takže
-svých
-její
-svými
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-pravé
-ji
-nad
-nejsou
-či
-pod
-téma
-mezi
-přes
-ty
-pak
-vám
-ani
-když
-však
-neg
-jsem
-tento
-článku
-články
-aby
-jsme
-před
-pta
-jejich
-byl
-ještě
-až
-bez
-také
-pouze
-první
-vaše
-která
-nás
-nový
-tipy
-pokud
-může
-strana
-jeho
-své
-jiné
-zprávy
-nové
-není
-vás
-jen
-podle
-zde
-už
-být
-více
-bude
-již
-než
-který
-by
-které
-co
-nebo
-ten
-tak
-má
-při
-od
-po
-jsou
-jak
-další
-ale
-si
-se
-ve
-to
-jako
-za
-zpět
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-přičemž
-já
-on
-ona
-ono
-oni
-ony
-my
-vy
-jí
-ji
-mě
-mne
-jemu
-tomu
-těm
-těmu
-němu
-němuž
-jehož
-jíž
-jelikož
-jež
-jakož
-načež
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-på | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-når | when
-være | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-også | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-været | be
-thi | for (conj)
-jer | you
-sådan | such, like this/like that
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-daß | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-für | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-können | can
-könnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-über | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-während | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-würde | would
-würden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
+++ /dev/null
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use 'σ' instead of 'ς'
-ο
-η
-το
-οι
-τα
-του
-τησ
-των
-τον
-την
-και
-κι
-κ
-ειμαι
-εισαι
-ειναι
-ειμαστε
-ειστε
-στο
-στον
-στη
-στην
-μα
-αλλα
-απο
-για
-προσ
-με
-σε
-ωσ
-παρα
-αντι
-κατα
-μετα
-θα
-να
-δε
-δεν
-μη
-μην
-επι
-ενω
-εαν
-αν
-τοτε
-που
-πωσ
-ποιοσ
-ποια
-ποιο
-ποιοι
-ποιεσ
-ποιων
-ποιουσ
-αυτοσ
-αυτη
-αυτο
-αυτοι
-αυτων
-αυτουσ
-αυτεσ
-αυτα
-εκεινοσ
-εκεινη
-εκεινο
-εκεινοι
-εκεινεσ
-εκεινα
-εκεινων
-εκεινουσ
-οπωσ
-ομωσ
-ισωσ
-οσο
-οτι
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-más | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-sí | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | está from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-también | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | están from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | había from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-mí | me
-antes | before
-algunos | some
-qué | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-él | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-tú | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-mío | mine
-mía |
-míos |
-mías |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-estás
-está
-estamos
-estáis
-están
-esté
-estés
-estemos
-estéis
-estén
-estaré
-estarás
-estará
-estaremos
-estaréis
-estarán
-estaría
-estarías
-estaríamos
-estaríais
-estarían
-estaba
-estabas
-estábamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuviéramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuviésemos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-habéis
-han
-haya
-hayas
-hayamos
-hayáis
-hayan
-habré
-habrás
-habrá
-habremos
-habréis
-habrán
-habría
-habrías
-habríamos
-habríais
-habrían
-había
-habías
-habíamos
-habíais
-habían
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubiéramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubiésemos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-seáis
-sean
-seré
-serás
-será
-seremos
-seréis
-serán
-sería
-serías
-seríamos
-seríais
-serían
-era
-eras
-éramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fuéramos
-fuerais
-fueran
-fuese
-fueses
-fuésemos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-tenéis
-tienen
-tenga
-tengas
-tengamos
-tengáis
-tengan
-tendré
-tendrás
-tendrá
-tendremos
-tendréis
-tendrán
-tendría
-tendrías
-tendríamos
-tendríais
-tendrían
-tenía
-tenías
-teníamos
-teníais
-tenían
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuviéramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuviésemos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
+++ /dev/null
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic 'ي' instead of 'ی'
-انان
-نداشته
-سراسر
-خياه
-ايشان
-وي
-تاكنون
-بيشتري
-دوم
-پس
-ناشي
-وگو
-يا
-داشتند
-سپس
-هنگام
-هرگز
-پنج
-نشان
-امسال
-ديگر
-گروهي
-شدند
-چطور
-ده
-و
-دو
-نخستين
-ولي
-چرا
-چه
-وسط
-ه
-كدام
-قابل
-يك
-رفت
-هفت
-همچنين
-در
-هزار
-بله
-بلي
-شايد
-اما
-شناسي
-گرفته
-دهد
-داشته
-دانست
-داشتن
-خواهيم
-ميليارد
-وقتيكه
-امد
-خواهد
-جز
-اورده
-شده
-بلكه
-خدمات
-شدن
-برخي
-نبود
-بسياري
-جلوگيري
-حق
-كردند
-نوعي
-بعري
-نكرده
-نظير
-نبايد
-بوده
-بودن
-داد
-اورد
-هست
-جايي
-شود
-دنبال
-داده
-بايد
-سابق
-هيچ
-همان
-انجا
-كمتر
-كجاست
-گردد
-كسي
-تر
-مردم
-تان
-دادن
-بودند
-سري
-جدا
-ندارند
-مگر
-يكديگر
-دارد
-دهند
-بنابراين
-هنگامي
-سمت
-جا
-انچه
-خود
-دادند
-زياد
-دارند
-اثر
-بدون
-بهترين
-بيشتر
-البته
-به
-براساس
-بيرون
-كرد
-بعضي
-گرفت
-توي
-اي
-ميليون
-او
-جريان
-تول
-بر
-مانند
-برابر
-باشيم
-مدتي
-گويند
-اكنون
-تا
-تنها
-جديد
-چند
-بي
-نشده
-كردن
-كردم
-گويد
-كرده
-كنيم
-نمي
-نزد
-روي
-قصد
-فقط
-بالاي
-ديگران
-اين
-ديروز
-توسط
-سوم
-ايم
-دانند
-سوي
-استفاده
-شما
-كنار
-داريم
-ساخته
-طور
-امده
-رفته
-نخست
-بيست
-نزديك
-طي
-كنيد
-از
-انها
-تمامي
-داشت
-يكي
-طريق
-اش
-چيست
-روب
-نمايد
-گفت
-چندين
-چيزي
-تواند
-ام
-ايا
-با
-ان
-ايد
-ترين
-اينكه
-ديگري
-راه
-هايي
-بروز
-همچنان
-پاعين
-كس
-حدود
-مختلف
-مقابل
-چيز
-گيرد
-ندارد
-ضد
-همچون
-سازي
-شان
-مورد
-باره
-مرسي
-خويش
-برخوردار
-چون
-خارج
-شش
-هنوز
-تحت
-ضمن
-هستيم
-گفته
-فكر
-بسيار
-پيش
-براي
-روزهاي
-انكه
-نخواهد
-بالا
-كل
-وقتي
-كي
-چنين
-كه
-گيري
-نيست
-است
-كجا
-كند
-نيز
-يابد
-بندي
-حتي
-توانند
-عقب
-خواست
-كنند
-بين
-تمام
-همه
-ما
-باشند
-مثل
-شد
-اري
-باشد
-اره
-طبق
-بعد
-اگر
-صورت
-غير
-جاي
-بيش
-ريزي
-اند
-زيرا
-چگونه
-بار
-لطفا
-مي
-درباره
-من
-ديده
-همين
-گذاري
-برداري
-علت
-گذاشته
-هم
-فوق
-نه
-ها
-شوند
-اباد
-همواره
-هر
-اول
-خواهند
-چهار
-نام
-امروز
-مان
-هاي
-قبل
-كنم
-سعي
-تازه
-را
-هستند
-زير
-جلوي
-عنوان
-بود
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-minä minun minut minua minussa minusta minuun minulla minulta minulle | I
-sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
-me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
-te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
-he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
-
-tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
-tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
-nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
-
-kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
-mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
-mitkä | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-että | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sekä | and
-sillä | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-même | same; as in moi-même (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-à | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-cela | that
-celà | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
+++ /dev/null
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bhúr
-caoga
-ceathair
-ceathrar
-chomh
-chtó
-chuig
-chun
-cois
-céad
-cúig
-cúigear
-d'
-daichead
-dar
-de
-deich
-deichniúr
-den
-dhá
-do
-don
-dtí
-dá
-dár
-dó
-faoi
-faoin
-faoina
-faoinár
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-inár
-is
-le
-leis
-lena
-lenár
-m'
-mar
-mo
-mé
-na
-nach
-naoi
-naonúr
-ná
-ní
-níor
-nó
-nócha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seachtó
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-sé
-sí
-tar
-thar
-thú
-triúr
-trí
-trína
-trínár
-tríocha
-tú
-um
-ár
-é
-éis
-í
-ó
-ón
-óna
-ónár
+++ /dev/null
-# galican stopwords
-a
-aínda
-alí
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aquí
-ao
-aos
-as
-así
-á
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalgún
-dalgúns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-está
-están
-este
-estes
-estiven
-estou
-eu
-é
-facer
-foi
-foron
-fun
-había
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-miña
-miñas
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-nós
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-ó
-ós
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-senón
-ser
-seu
-seus
-sexa
-sido
-sobre
-súa
-súas
-tamén
-tan
-te
-ten
-teñen
-teño
-ter
-teu
-teus
-ti
-tido
-tiña
-tiven
-túa
-túas
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-vós
+++ /dev/null
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-अंदर
-अत
-अपना
-अपनी
-अपने
-अभी
-आदि
-आप
-इत्यादि
-इन
-इनका
-इन्हीं
-इन्हें
-इन्हों
-इस
-इसका
-इसकी
-इसके
-इसमें
-इसी
-इसे
-उन
-उनका
-उनकी
-उनके
-उनको
-उन्हीं
-उन्हें
-उन्हों
-उस
-उसके
-उसी
-उसे
-एक
-एवं
-एस
-ऐसे
-और
-कई
-कर
-करता
-करते
-करना
-करने
-करें
-कहते
-कहा
-का
-काफ़ी
-कि
-कितना
-किन्हें
-किन्हों
-किया
-किर
-किस
-किसी
-किसे
-की
-कुछ
-कुल
-के
-को
-कोई
-कौन
-कौनसा
-गया
-घर
-जब
-जहाँ
-जा
-जितना
-जिन
-जिन्हें
-जिन्हों
-जिस
-जिसे
-जीधर
-जैसा
-जैसे
-जो
-तक
-तब
-तरह
-तिन
-तिन्हें
-तिन्हों
-तिस
-तिसे
-तो
-था
-थी
-थे
-दबारा
-दिया
-दुसरा
-दूसरे
-दो
-द्वारा
-न
-नहीं
-ना
-निहायत
-नीचे
-ने
-पर
-पर
-पहले
-पूरा
-पे
-फिर
-बनी
-बही
-बहुत
-बाद
-बाला
-बिलकुल
-भी
-भीतर
-मगर
-मानो
-मे
-में
-यदि
-यह
-यहाँ
-यही
-या
-यिह
-ये
-रखें
-रहा
-रहे
-ऱ्वासा
-लिए
-लिये
-लेकिन
-व
-वर्ग
-वह
-वह
-वहाँ
-वहीं
-वाले
-वुह
-वे
-वग़ैरह
-संग
-सकता
-सकते
-सबसे
-सभी
-साथ
-साबुत
-साभ
-सारा
-से
-सो
-ही
-हुआ
-हुई
-हुए
-है
-हैं
-हो
-होता
-होती
-होते
-होना
-होने
-# additional normalized forms of the above
-अपनि
-जेसे
-होति
-सभि
-तिंहों
-इंहों
-दवारा
-इसि
-किंहें
-थि
-उंहों
-ओर
-जिंहें
-वहिं
-अभि
-बनि
-हि
-उंहिं
-उंहें
-हें
-वगेरह
-एसे
-रवासा
-कोन
-निचे
-काफि
-उसि
-पुरा
-भितर
-हे
-बहि
-वहां
-कोइ
-यहां
-जिंहों
-तिंहें
-किसि
-कइ
-यहि
-इंहिं
-जिधर
-इंहें
-अदि
-इतयादि
-हुइ
-कोनसा
-इसकि
-दुसरे
-जहां
-अप
-किंहों
-उनकि
-भि
-वरग
-हुअ
-जेसा
-नहिं
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-által
-általában
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-amíg
-amikor
-át
-abban
-ahhoz
-annak
-arra
-arról
-az
-azok
-azon
-azt
-azzal
-azért
-aztán
-azután
-azonban
-bár
-be
-belül
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-egész
-egy
-egyes
-egyetlen
-egyéb
-egyik
-egyre
-ekkor
-el
-elég
-ellen
-elő
-először
-előtt
-első
-én
-éppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ezért
-és
-fel
-felé
-hanem
-hiszen
-hogy
-hogyan
-igen
-így
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ismét
-itt
-jó
-jól
-jobban
-kell
-kellett
-keresztül
-keressünk
-ki
-kívül
-között
-közül
-legalább
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-magát
-majd
-majd
-már
-más
-másik
-meg
-még
-mellett
-mert
-mely
-melyek
-mi
-mit
-míg
-miért
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-néha
-nekem
-neki
-nem
-néhány
-nélkül
-nincs
-olyan
-ott
-össze
-ő
-ők
-őket
-pedig
-persze
-rá
-s
-saját
-sem
-semmi
-sok
-sokat
-sokkal
-számára
-szemben
-szerint
-szinte
-talán
-tehát
-teljes
-tovább
-továbbá
-több
-úgy
-ugyanis
-új
-újabb
-újra
-után
-utána
-utolsó
-vagy
-vagyis
-valaki
-valami
-valamint
-való
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
+++ /dev/null
-# example set of Armenian stopwords.
-այդ
-այլ
-այն
-այս
-դու
-դուք
-եմ
-են
-ենք
-ես
-եք
-է
-էի
-էին
-էինք
-էիր
-էիք
-էր
-ըստ
-թ
-ի
-ին
-իսկ
-իր
-կամ
-համար
-հետ
-հետո
-մենք
-մեջ
-մի
-ն
-նա
-նաև
-նրա
-նրանք
-որ
-որը
-որոնք
-որպես
-ու
-ում
-պիտի
-վրա
-և
+++ /dev/null
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perché | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-più | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avrò
-avrai
-avrà
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-è
-siamo
-siete
-sia
-siate
-siano
-sarò
-sarai
-sarà
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-farò
-farai
-farà
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-starò
-starai
-starà
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
+++ /dev/null
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-の
-に
-は
-を
-た
-が
-で
-て
-と
-し
-れ
-さ
-ある
-いる
-も
-する
-から
-な
-こと
-として
-い
-や
-れる
-など
-なっ
-ない
-この
-ため
-その
-あっ
-よう
-また
-もの
-という
-あり
-まで
-られ
-なる
-へ
-か
-だ
-これ
-によって
-により
-おり
-より
-による
-ず
-なり
-られる
-において
-ば
-なかっ
-なく
-しかし
-について
-せ
-だっ
-その後
-できる
-それ
-う
-ので
-なお
-のみ
-でき
-き
-つ
-における
-および
-いう
-さらに
-でも
-ら
-たり
-その他
-に関する
-たち
-ます
-ん
-なら
-に対して
-特に
-せる
-及び
-これら
-とき
-では
-にて
-ほか
-ながら
-うち
-そして
-とともに
-ただし
-かつて
-それぞれ
-または
-お
-ほど
-ものの
-に対する
-ほとんど
-と共に
-といった
-です
-とも
-ところ
-ここ
-##### End of file
+++ /dev/null
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apakš
-ārpus
-augšpus
-bez
-caur
-dēļ
-gar
-iekš
-iz
-kopš
-labad
-lejpus
-līdz
-no
-otrpus
-pa
-par
-pār
-pēc
-pie
-pirms
-pret
-priekš
-starp
-šaipus
-uz
-viņpus
-virs
-virspus
-zem
-apakšpus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tomēr
-tikko
-turpretī
-arī
-kaut
-gan
-tādēļ
-tā
-ne
-tikvien
-vien
-kā
-ir
-te
-vai
-kamēr
-# Particles
-ar
-diezin
-droši
-diemžēl
-nebūt
-ik
-it
-taču
-nu
-pat
-tiklab
-iekšpus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iekām
-iekāms
-kolīdz
-līdzko
-tiklīdz
-jebšu
-tālab
-tāpēc
-nekā
-itin
-jā
-jau
-jel
-nē
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-būt
-biju
-biji
-bija
-bijām
-bijāt
-esmu
-esi
-esam
-esat
-būšu
-būsi
-būs
-būsim
-būsiet
-tikt
-tiku
-tiki
-tika
-tikām
-tikāt
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tikšu
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tapāt
-topat
-tapšu
-tapsi
-taps
-tapsim
-tapsiet
-kļūt
-kļuvu
-kļuvi
-kļuva
-kļuvām
-kļuvāt
-kļūstu
-kļūsti
-kļūst
-kļūstam
-kļūstat
-kļūšu
-kļūsi
-kļūs
-kļūsim
-kļūsiet
-# verbs
-varēt
-varēju
-varējām
-varēšu
-varēsim
-var
-varēji
-varējāt
-varēsi
-varēsiet
-varat
-varēja
-varēs
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
-
-og | and
-i | in
-jeg | I
-det | it/this/that
-at | to (w. inf.)
-en | a/an
-et | a/an
-den | it/this/that
-til | to
-er | is/am/are
-som | who/that
-på | on
-de | they / you(formal)
-med | with
-han | he
-av | of
-ikke | not
-ikkje | not *
-der | there
-så | so
-var | was/were
-meg | me
-seg | you
-men | but
-ett | one
-har | have
-om | about
-vi | we
-min | my
-mitt | my
-ha | have
-hadde | had
-hun | she
-nå | now
-over | over
-da | when/as
-ved | by/know
-fra | from
-du | you
-ut | out
-sin | your
-dem | them
-oss | us
-opp | up
-man | you/one
-kan | can
-hans | his
-hvor | where
-eller | or
-hva | what
-skal | shall/must
-selv | self (reflective)
-sjøl | self (reflective)
-her | here
-alle | all
-vil | will
-bli | become
-ble | became
-blei | became *
-blitt | have become
-kunne | could
-inn | in
-når | when
-være | be
-kom | come
-noen | some
-noe | some
-ville | would
-dere | you
-som | who/which/that
-deres | their/theirs
-kun | only/just
-ja | yes
-etter | after
-ned | down
-skulle | should
-denne | this
-for | for/because
-deg | you
-si | hers/his
-sine | hers/his
-sitt | hers/his
-mot | against
-å | to
-meget | much
-hvorfor | why
-dette | this
-disse | these/those
-uten | without
-hvordan | how
-ingen | none
-din | your
-ditt | your
-blir | become
-samme | same
-hvilken | which
-hvilke | which (plural)
-sånn | such a
-inni | inside/within
-mellom | between
-vår | our
-hver | each
-hvem | who
-vors | us/ours
-hvis | whose
-både | both
-bare | only/just
-enn | than
-fordi | as/because
-før | before
-mange | many
-også | also
-slik | just
-vært | been
-være | to be
-båe | both *
-begge | both
-siden | since
-dykk | your *
-dykkar | yours *
-dei | they *
-deira | them *
-deires | theirs *
-deim | them *
-di | your (fem.) *
-då | as/when *
-eg | I *
-ein | a/an *
-eit | a/an *
-eitt | a/an *
-elles | or *
-honom | he *
-hjå | at *
-ho | she *
-hoe | she *
-henne | her
-hennar | her/hers
-hennes | hers
-hoss | how *
-hossen | how *
-ikkje | not *
-ingi | noone *
-inkje | noone *
-korleis | how *
-korso | how *
-kva | what/which *
-kvar | where *
-kvarhelst | where *
-kven | who/whom *
-kvi | why *
-kvifor | why *
-me | we *
-medan | while *
-mi | my *
-mine | my *
-mykje | much *
-no | now *
-nokon | some (masc./neut.) *
-noka | some (fem.) *
-nokor | some *
-noko | some *
-nokre | some *
-si | his/hers *
-sia | since *
-sidan | since *
-so | so *
-somt | some *
-somme | some *
-um | about*
-upp | up *
-vere | be *
-vore | was *
-verte | become *
-vort | become *
-varte | became *
-vart | became *
-
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Portuguese stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | of, from
-a | the; to, at; her
-o | the; him
-que | who, that
-e | and
-do | de + o
-da | de + a
-em | in
-um | a
-para | for
- | é from SER
-com | with
-não | not, no
-uma | a
-os | the; them
-no | em + o
-se | himself etc
-na | em + a
-por | for
-mais | more
-as | the; them
-dos | de + os
-como | as, like
-mas | but
- | foi from SER
-ao | a + o
-ele | he
-das | de + as
- | tem from TER
-à | a + a
-seu | his
-sua | her
-ou | or
- | ser from SER
-quando | when
-muito | much
- | há from HAV
-nos | em + os; us
-já | already, now
- | está from EST
-eu | I
-também | also
-só | only, just
-pelo | per + o
-pela | per + a
-até | up to
-isso | that
-ela | he
-entre | between
- | era from SER
-depois | after
-sem | without
-mesmo | same
-aos | a + os
- | ter from TER
-seus | his
-quem | whom
-nas | em + as
-me | me
-esse | that
-eles | they
- | estão from EST
-você | you
- | tinha from TER
- | foram from SER
-essa | that
-num | em + um
-nem | nor
-suas | her
-meu | my
-às | a + as
-minha | my
- | têm from TER
-numa | em + uma
-pelos | per + os
-elas | they
- | havia from HAV
- | seja from SER
-qual | which
- | será from SER
-nós | we
- | tenho from TER
-lhe | to him, her
-deles | of them
-essas | those
-esses | those
-pelas | per + as
-este | this
- | fosse from SER
-dele | of him
-
- | other words. There are many contractions such as naquele = em+aquele,
- | mo = me+o, but they are rare.
- | Indefinite article plural forms are also rare.
-
-tu | thou
-te | thee
-vocês | you (plural)
-vos | you
-lhes | to them
-meus | my
-minhas
-teu | thy
-tua
-teus
-tuas
-nosso | our
-nossa
-nossos
-nossas
-
-dela | of her
-delas | of them
-
-esta | this
-estes | these
-estas | these
-aquele | that
-aquela | that
-aqueles | those
-aquelas | those
-isto | this
-aquilo | that
-
- | forms of estar, to be (not including the infinitive):
-estou
-está
-estamos
-estão
-estive
-esteve
-estivemos
-estiveram
-estava
-estávamos
-estavam
-estivera
-estivéramos
-esteja
-estejamos
-estejam
-estivesse
-estivéssemos
-estivessem
-estiver
-estivermos
-estiverem
-
- | forms of haver, to have (not including the infinitive):
-hei
-há
-havemos
-hão
-houve
-houvemos
-houveram
-houvera
-houvéramos
-haja
-hajamos
-hajam
-houvesse
-houvéssemos
-houvessem
-houver
-houvermos
-houverem
-houverei
-houverá
-houveremos
-houverão
-houveria
-houveríamos
-houveriam
-
- | forms of ser, to be (not including the infinitive):
-sou
-somos
-são
-era
-éramos
-eram
-fui
-foi
-fomos
-foram
-fora
-fôramos
-seja
-sejamos
-sejam
-fosse
-fôssemos
-fossem
-for
-formos
-forem
-serei
-será
-seremos
-serão
-seria
-seríamos
-seriam
-
- | forms of ter, to have (not including the infinitive):
-tenho
-tem
-temos
-tém
-tinha
-tínhamos
-tinham
-tive
-teve
-tivemos
-tiveram
-tivera
-tivéramos
-tenha
-tenhamos
-tenham
-tivesse
-tivéssemos
-tivessem
-tiver
-tivermos
-tiverem
-terei
-terá
-teremos
-terão
-teria
-teríamos
-teriam
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-acea
-aceasta
-această
-aceea
-acei
-aceia
-acel
-acela
-acele
-acelea
-acest
-acesta
-aceste
-acestea
-aceşti
-aceştia
-acolo
-acum
-ai
-aia
-aibă
-aici
-al
-ăla
-ale
-alea
-ălea
-altceva
-altcineva
-am
-ar
-are
-aş
-aşadar
-asemenea
-asta
-ăsta
-astăzi
-astea
-ăstea
-ăştia
-asupra
-aţi
-au
-avea
-avem
-aveţi
-azi
-bine
-bucur
-bună
-ca
-că
-căci
-când
-care
-cărei
-căror
-cărui
-cât
-câte
-câţi
-către
-câtva
-ce
-cel
-ceva
-chiar
-cînd
-cine
-cineva
-cît
-cîte
-cîţi
-cîtva
-contra
-cu
-cum
-cumva
-curând
-curînd
-da
-dă
-dacă
-dar
-datorită
-de
-deci
-deja
-deoarece
-departe
-deşi
-din
-dinaintea
-dintr
-dintre
-drept
-după
-ea
-ei
-el
-ele
-eram
-este
-eşti
-eu
-face
-fără
-fi
-fie
-fiecare
-fii
-fim
-fiţi
-iar
-ieri
-îi
-îl
-îmi
-împotriva
-în
-înainte
-înaintea
-încât
-încît
-încotro
-între
-întrucât
-întrucît
-îţi
-la
-lângă
-le
-li
-lîngă
-lor
-lui
-mă
-mâine
-mea
-mei
-mele
-mereu
-meu
-mi
-mine
-mult
-multă
-mulţi
-ne
-nicăieri
-nici
-nimeni
-nişte
-noastră
-noastre
-noi
-noştri
-nostru
-nu
-ori
-oricând
-oricare
-oricât
-orice
-oricînd
-oricine
-oricît
-oricum
-oriunde
-până
-pe
-pentru
-peste
-pînă
-poate
-pot
-prea
-prima
-primul
-prin
-printr
-sa
-să
-săi
-sale
-sau
-său
-se
-şi
-sînt
-sîntem
-sînteţi
-spre
-sub
-sunt
-suntem
-sunteţi
-ta
-tăi
-tale
-tău
-te
-ţi
-ţie
-tine
-toată
-toate
-tot
-toţi
-totuşi
-tu
-un
-una
-unde
-undeva
-unei
-unele
-uneori
-unor
-vă
-vi
-voastră
-voastre
-voi
-voştri
-vostru
-vouă
-vreo
-vreun
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | a russian stop word list. comments begin with vertical bar. each stop
- | word is at the start of a line.
-
- | this is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | letter `ё' is translated to `е'.
-
-и | and
-в | in/into
-во | alternative form
-не | not
-что | what/that
-он | he
-на | on/onto
-я | i
-с | from
-со | alternative form
-как | how
-а | milder form of `no' (but)
-то | conjunction and form of `that'
-все | all
-она | she
-так | so, thus
-его | him
-но | but
-да | yes/and
-ты | thou
-к | towards, by
-у | around, chez
-же | intensifier particle
-вы | you
-за | beyond, behind
-бы | conditional/subj. particle
-по | up to, along
-только | only
-ее | her
-мне | to me
-было | it was
-вот | here is/are, particle
-от | away from
-меня | me
-еще | still, yet, more
-нет | no, there isnt/arent
-о | about
-из | out of
-ему | to him
-теперь | now
-когда | when
-даже | even
-ну | so, well
-вдруг | suddenly
-ли | interrogative particle
-если | if
-уже | already, but homonym of `narrower'
-или | or
-ни | neither
-быть | to be
-был | he was
-него | prepositional form of его
-до | up to
-вас | you accusative
-нибудь | indef. suffix preceded by hyphen
-опять | again
-уж | already, but homonym of `adder'
-вам | to you
-сказал | he said
-ведь | particle `after all'
-там | there
-потом | then
-себя | oneself
-ничего | nothing
-ей | to her
-может | usually with `быть' as `maybe'
-они | they
-тут | here
-где | where
-есть | there is/are
-надо | got to, must
-ней | prepositional form of ей
-для | for
-мы | we
-тебя | thee
-их | them, their
-чем | than
-была | she was
-сам | self
-чтоб | in order to
-без | without
-будто | as if
-человек | man, person, one
-чего | genitive form of `what'
-раз | once
-тоже | also
-себе | to oneself
-под | beneath
-жизнь | life
-будет | will be
-ж | short form of intensifer particle `же'
-тогда | then
-кто | who
-этот | this
-говорил | was saying
-того | genitive form of `that'
-потому | for that reason
-этого | genitive form of `this'
-какой | which
-совсем | altogether
-ним | prepositional form of `его', `они'
-здесь | here
-этом | prepositional form of `этот'
-один | one
-почти | almost
-мой | my
-тем | instrumental/dative plural of `тот', `то'
-чтобы | full form of `in order that'
-нее | her (acc.)
-кажется | it seems
-сейчас | now
-были | they were
-куда | where to
-зачем | why
-сказать | to say
-всех | all (acc., gen. preposn. plural)
-никогда | never
-сегодня | today
-можно | possible, one can
-при | by
-наконец | finally
-два | two
-об | alternative form of `о', about
-другой | another
-хоть | even
-после | after
-над | above
-больше | more
-тот | that one (masc.)
-через | across, in
-эти | these
-нас | us
-про | about
-всего | in all, only, of all
-них | prepositional form of `они' (they)
-какая | which, feminine
-много | lots
-разве | interrogative particle
-сказала | she said
-три | three
-эту | this, acc. fem. sing.
-моя | my, feminine
-впрочем | moreover, besides
-хорошо | good
-свою | ones own, acc. fem. sing.
-этой | oblique form of `эта', fem. `this'
-перед | in front of
-иногда | sometimes
-лучше | better
-чуть | a little
-том | preposn. form of `that one'
-нельзя | one must not
-такой | such a one
-им | to them
-более | more
-всегда | always
-конечно | of course
-всю | acc. fem. sing of `all'
-между | between
-
-
- | b: some paradigms
- |
- | personal pronouns
- |
- | я меня мне мной [мною]
- | ты тебя тебе тобой [тобою]
- | он его ему им [него, нему, ним]
- | она ее эи ею [нее, нэи, нею]
- | оно его ему им [него, нему, ним]
- |
- | мы нас нам нами
- | вы вас вам вами
- | они их им ими [них, ним, ними]
- |
- | себя себе собой [собою]
- |
- | demonstrative pronouns: этот (this), тот (that)
- |
- | этот эта это эти
- | этого эты это эти
- | этого этой этого этих
- | этому этой этому этим
- | этим этой этим [этою] этими
- | этом этой этом этих
- |
- | тот та то те
- | того ту то те
- | того той того тех
- | тому той тому тем
- | тем той тем [тою] теми
- | том той том тех
- |
- | determinative pronouns
- |
- | (a) весь (all)
- |
- | весь вся все все
- | всего всю все все
- | всего всей всего всех
- | всему всей всему всем
- | всем всей всем [всею] всеми
- | всем всей всем всех
- |
- | (b) сам (himself etc)
- |
- | сам сама само сами
- | самого саму само самих
- | самого самой самого самих
- | самому самой самому самим
- | самим самой самим [самою] самими
- | самом самой самом самих
- |
- | stems of verbs `to be', `to have', `to do' and modal
- |
- | быть бы буд быв есть суть
- | име
- | дел
- | мог мож мочь
- | уме
- | хоч хот
- | долж
- | можн
- | нужн
- | нельзя
-
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- | så = so, but also seed. These are indicated clearly below.
-
-och | and
-det | it, this/that
-att | to (with infinitive)
-i | in, at
-en | a
-jag | I
-hon | she
-som | who, that
-han | he
-på | on
-den | it, this/that
-med | with
-var | where, each
-sig | him(self) etc
-för | for
-så | so (also: seed)
-till | to
-är | is
-men | but
-ett | a
-om | if; around, about
-hade | had
-de | they, these/those
-av | of
-icke | not, no
-mig | me
-du | you
-henne | her
-då | then, when
-sin | his
-nu | now
-har | have
-inte | inte någon = no one
-hans | his
-honom | him
-skulle | 'sake'
-hennes | her
-där | there
-min | my
-man | one (pronoun)
-ej | nor
-vid | at, by, on (also: vast)
-kunde | could
-något | some etc
-från | from, off
-ut | out
-när | when
-efter | after, behind
-upp | up
-vi | we
-dem | them
-vara | be
-vad | what
-över | over
-än | than
-dig | you
-kan | can
-sina | his
-här | here
-ha | have
-mot | towards
-alla | all
-under | under (also: wonder)
-någon | some etc
-eller | or (else)
-allt | all
-mycket | much
-sedan | since
-ju | why
-denna | this/that
-själv | myself, yourself etc
-detta | this/that
-åt | to
-utan | without
-varit | was
-hur | how
-ingen | no
-mitt | my
-ni | you
-bli | to be, become
-blev | from bli
-oss | us
-din | thy
-dessa | these/those
-några | some etc
-deras | their
-blir | from bli
-mina | my
-samma | (the) same
-vilken | who, that
-er | you, your
-sådan | such a
-vår | our
-blivit | from bli
-dess | its
-inom | within
-mellan | between
-sådant | such a
-varför | why
-varje | each
-vilka | who, that
-ditt | thy
-vem | who
-vilket | who, that
-sitta | his
-sådana | such a
-vart | each
-dina | thy
-vars | whose
-vårt | our
-våra | our
-ert | your
-era | your
-vilkas | whose
-
+++ /dev/null
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
+++ /dev/null
-# Turkish stopwords from LUCENE-559
-# merged with the list from "Information Retrieval on Turkish Texts"
-# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
-acaba
-altmış
-altı
-ama
-ancak
-arada
-aslında
-ayrıca
-bana
-bazı
-belki
-ben
-benden
-beni
-benim
-beri
-beş
-bile
-bin
-bir
-birçok
-biri
-birkaç
-birkez
-birşey
-birşeyi
-biz
-bize
-bizden
-bizi
-bizim
-böyle
-böylece
-bu
-buna
-bunda
-bundan
-bunlar
-bunları
-bunların
-bunu
-bunun
-burada
-çok
-çünkü
-da
-daha
-dahi
-de
-defa
-değil
-diğer
-diye
-doksan
-dokuz
-dolayı
-dolayısıyla
-dört
-edecek
-eden
-ederek
-edilecek
-ediliyor
-edilmesi
-ediyor
-eğer
-elli
-en
-etmesi
-etti
-ettiği
-ettiğini
-gibi
-göre
-halen
-hangi
-hatta
-hem
-henüz
-hep
-hepsi
-her
-herhangi
-herkesin
-hiç
-hiçbir
-için
-iki
-ile
-ilgili
-ise
-işte
-itibaren
-itibariyle
-kadar
-karşın
-katrilyon
-kendi
-kendilerine
-kendini
-kendisi
-kendisine
-kendisini
-kez
-ki
-kim
-kimden
-kime
-kimi
-kimse
-kırk
-milyar
-milyon
-mu
-mü
-mı
-nasıl
-ne
-neden
-nedenle
-nerde
-nerede
-nereye
-niye
-niçin
-o
-olan
-olarak
-oldu
-olduğu
-olduğunu
-olduklarını
-olmadı
-olmadığı
-olmak
-olması
-olmayan
-olmaz
-olsa
-olsun
-olup
-olur
-olursa
-oluyor
-on
-ona
-ondan
-onlar
-onlardan
-onları
-onların
-onu
-onun
-otuz
-oysa
-öyle
-pek
-rağmen
-sadece
-sanki
-sekiz
-seksen
-sen
-senden
-seni
-senin
-siz
-sizden
-sizi
-sizin
-şey
-şeyden
-şeyi
-şeyler
-şöyle
-şu
-şuna
-şunda
-şundan
-şunları
-şunu
-tarafından
-trilyon
-tüm
-üç
-üzere
-var
-vardı
-ve
-veya
-ya
-yani
-yapacak
-yapılan
-yapılması
-yapıyor
-yapmak
-yaptı
-yaptığı
-yaptığını
-yaptıkları
-yedi
-yerine
-yetmiş
-yine
-yirmi
-yoksa
-yüz
-zaten
+++ /dev/null
-#
-# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
-#
-# Add entries to this file in order to override the statistical model in terms
-# of segmentation, readings and part-of-speech tags. Notice that entries do
-# not have weights since they are always used when found. This is by-design
-# in order to maximize ease-of-use.
-#
-# Entries are defined using the following CSV format:
-# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
-#
-# Notice that a single half-width space separates tokens and readings, and
-# that the number tokens and readings must match exactly.
-#
-# Also notice that multiple entries with the same <text> is undefined.
-#
-# Whitespace only lines are ignored. Comments are not allowed on entry lines.
-#
-
-# Custom segmentation for kanji compounds
-日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
-関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
-
-# Custom segmentation for compound katakana
-トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
-ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
-
-# Custom reading for former sumo wrestler
-朝青龍,朝青龍,アサショウリュウ,カスタム人名
+++ /dev/null
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default)
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
-
- PERFORMANCE NOTE: this schema includes many optional features and should not
- be used for benchmarking. To improve performance one could
- - set stored="false" for all fields possible (esp large fields) when you
- only need to search on the field but don't need to return the original
- value.
- - set indexed="false" if you don't need to search on the field, but only
- return the field as a result of searching on other indexed fields.
- - remove all unneeded copyField statements
- - for best index size and searching performance, set "index" to false
- for all general text fields, use copyField to copy them to the
- catchall "text" field, and use that for searching.
- - For maximum indexing performance, use the StreamingUpdateSolrServer
- java client.
- - Remember to run the JVM in server mode, and use a higher logging level
- that avoids logging every request
--->
-
-<schema name="example-schemaless" version="1.5">
- <!-- attribute "name" is the name of this schema and is only used for display purposes.
- version="x.y" is Solr's version number for the schema syntax and
- semantics. It should not normally be changed by applications.
-
- 1.0: multiValued attribute did not exist, all fields are multiValued
- by nature
- 1.1: multiValued attribute introduced, false by default
- 1.2: omitTermFreqAndPositions attribute introduced, true by default
- except for text fields.
- 1.3: removed optional field compress feature
- 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
- behavior when a single string produces multiple tokens. Defaults
- to off for version >= 1.4
- 1.5: omitNorms defaults to true for primitive field types
- (int, float, boolean, string...)
- -->
-
- <fields>
- <!-- Valid attributes for fields:
- name: mandatory - the name for the field
- type: mandatory - the name of a field type from the
- <types> fieldType section
- indexed: true if this field should be indexed (searchable or sortable)
- stored: true if this field should be retrievable
- docValues: true if this field should have doc values. Doc values are
- useful for faceting, grouping, sorting and function queries. Although not
- required, doc values will make the index faster to load, more
- NRT-friendly and more memory-efficient. They however come with some
- limitations: they are currently only supported by StrField, UUIDField
- and all Trie*Fields, and depending on the field type, they might
- require the field to be single-valued, be required or have a default
- value (check the documentation of the field type you're interested in
- for more information)
- multiValued: true if this field may contain multiple values per document
- omitNorms: (expert) set to true to omit the norms associated with
- this field (this disables length normalization and index-time
- boosting for the field, and saves some memory). Only full-text
- fields or fields that need an index-time boost need norms.
- Norms are omitted for primitive (non-analyzed) types by default.
- termVectors: [false] set to true to store the term vector for a
- given field.
- When using MoreLikeThis, fields used for similarity should be
- stored for best performance.
- termPositions: Store position information with the term vector.
- This will increase storage costs.
- termOffsets: Store offset information with the term vector. This
- will increase storage costs.
- required: The field is required. It will throw an error if the
- value does not exist
- default: a value that should be used if no value is specified
- when adding a document.
- -->
-
- <!-- field names should consist of alphanumeric or underscore characters only and
- not start with a digit. This is not currently strictly enforced,
- but other field names will not have first class support from all components
- and back compatibility is not guaranteed. Names with both leading and
- trailing underscores (e.g. _version_) are reserved.
- -->
-
- <!-- In this "schemaless" example, only two fields are pre-declared: id and _version_.
- All other fields will be type guessed and added via the
- "add-unknown-fields-to-the-schema" update request processor chain declared
- in solrconfig.xml.
- -->
- <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
- <field name="_version_" type="long" indexed="true" stored="true"/>
-
-
- <!-- Dynamic field definitions allow using convention over configuration
- for fields via the specification of patterns to match field names.
- EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
- RESTRICTION: the glob-like pattern in the name attribute must have
- a "*" only at the start or the end. -->
-
- <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
- <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_s" type="string" indexed="true" stored="true" />
- <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
- <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
- <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
- <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
- <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
- <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
-
- <!-- Type used to index the lat and lon components for the "location" FieldType -->
- <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
-
- <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
- <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
- <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
-
- <!-- some trie-coded dynamic fields for faster range queries -->
- <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
- <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
- <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
- <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
- <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
-
- <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
- <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
-
- <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
- <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
-
- <dynamicField name="random_*" type="random" />
-
- <!-- uncomment the following to ignore any fields that don't already match an existing
- field name or dynamic field, rather than reporting them as an error.
- alternately, change the type="ignored" to some other type e.g. "text" if you want
- unknown fields indexed and/or stored by default
-
- NB: use of "*" dynamic fields will disable field type guessing and adding
- unknown fields to the schema. -->
- <!--dynamicField name="*" type="ignored" multiValued="true" /-->
-
- </fields>
-
-
- <!-- Field to use to determine and enforce document uniqueness.
- Unless this field is marked with required="false", it will be a required field
- -->
- <uniqueKey>id</uniqueKey>
-
- <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
- parsing a query string that isn't explicit about the field. Machine (non-user)
- generated queries are best made explicit, or they can use the "df" request parameter
- which takes precedence over this.
- Note: Un-commenting defaultSearchField will be insufficient if your request handler
- in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
- <defaultSearchField>text</defaultSearchField> -->
-
- <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
- when parsing a query string to determine if a clause of the query should be marked as
- required or optional, assuming the clause isn't already marked by some operator.
- The default is OR, which is generally assumed so it is not a good idea to change it
- globally here. The "q.op" request parameter takes precedence over this.
- <solrQueryParser defaultOperator="OR"/> -->
-
- <!-- copyField commands copy one field to another at the time a document
- is added to the index. It's used either to index the same field differently,
- or to add multiple fields to the same field for easier/faster searching.
-
- <copyField source="cat" dest="text"/>
- <copyField source="name" dest="text"/>
- <copyField source="manu" dest="text"/>
- <copyField source="features" dest="text"/>
- <copyField source="includes" dest="text"/>
- <copyField source="manu" dest="manu_exact"/>
- -->
-
- <!-- Copy the price into a currency enabled field (default USD)
- <copyField source="price" dest="price_c"/>
- -->
-
- <!-- Text fields from SolrCell to search by default in our catch-all field
- <copyField source="title" dest="text"/>
- <copyField source="author" dest="text"/>
- <copyField source="description" dest="text"/>
- <copyField source="keywords" dest="text"/>
- <copyField source="content" dest="text"/>
- <copyField source="content_type" dest="text"/>
- <copyField source="resourcename" dest="text"/>
- <copyField source="url" dest="text"/>
- -->
-
- <!-- Create a string version of author for faceting
- <copyField source="author" dest="author_s"/>
- -->
-
- <!-- Above, multiple source fields are copied to the [text] field.
- Another way to map multiple source fields to the same
- destination field is to use the dynamic field syntax.
- copyField also supports a maxChars to copy setting. -->
-
- <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
-
- <!-- copy name to alphaNameSort, a field designed for sorting by name -->
- <!-- <copyField source="name" dest="alphaNameSort"/> -->
-
- <types>
- <!-- field type definitions. The "name" attribute is
- just a label to be used by field definitions. The "class"
- attribute and any other attributes determine the real
- behavior of the fieldType.
- Class names starting with "solr" refer to java classes in a
- standard package such as org.apache.solr.analysis
- -->
-
- <!-- The StrField type is not analyzed, but indexed/stored verbatim.
- It supports doc values but in that case the field needs to be
- single-valued and either required or have a default value.
- -->
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
-
- <!-- boolean type: "true" or "false" -->
- <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
-
- <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
-
- <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
- currently supported on types that are sorted internally as strings
- and on numeric types.
- This includes "string","boolean", and, as of 3.5 (and 4.x),
- int, float, long, date, double, including the "Trie" variants.
- - If sortMissingLast="true", then a sort on this field will cause documents
- without the field to come after documents with the field,
- regardless of the requested sort order (asc or desc).
- - If sortMissingFirst="true", then a sort on this field will cause documents
- without the field to come before documents with the field,
- regardless of the requested sort order.
- - If sortMissingLast="false" and sortMissingFirst="false" (the default),
- then default lucene sorting will be used which places docs without the
- field first in an ascending sort and last in a descending sort.
- -->
-
- <!--
- Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
-
- These fields support doc values, but they require the field to be
- single-valued and either be required or have a default value.
- -->
- <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
-
- <!--
- Numeric field types that index each value at various levels of precision
- to accelerate range queries when the number of values between the range
- endpoints is large. See the javadoc for NumericRangeQuery for internal
- implementation details.
-
- Smaller precisionStep values (specified in bits) will lead to more tokens
- indexed per value, slightly larger index size, and faster range queries.
- A precisionStep of 0 disables indexing at different precision levels.
- -->
- <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
- <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
- <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
- <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
-
- <fieldType name="tints" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
- <fieldType name="tfloats" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
- <fieldType name="tlongs" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
- <fieldType name="tdoubles" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
-
- <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
- is a more restricted form of the canonical representation of dateTime
- http://www.w3.org/TR/xmlschema-2/#dateTime
- The trailing "Z" designates UTC time and is mandatory.
- Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
- All other components are mandatory.
-
- Expressions can also be used to denote calculations that should be
- performed relative to "NOW" to determine the value, ie...
-
- NOW/HOUR
- ... Round to the start of the current hour
- NOW-1DAY
- ... Exactly 1 day prior to now
- NOW/DAY+6MONTHS+3DAYS
- ... 6 months and 3 days in the future from the start of
- the current day
-
- Consult the DateField javadocs for more information.
-
- Note: For faster range queries, consider the tdate type
- -->
- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
-
- <!-- A Trie based date field for faster date range queries and date faceting. -->
- <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
-
- <fieldType name="tdates" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
-
-
- <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
- <fieldtype name="binary" class="solr.BinaryField"/>
-
- <!--
- Note:
- These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
- Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
-
- Plain numeric field types that store and index the text
- value verbatim (and hence don't correctly support range queries, since the
- lexicographic ordering isn't equal to the numeric ordering)
- -->
- <fieldType name="pint" class="solr.IntField"/>
- <fieldType name="plong" class="solr.LongField"/>
- <fieldType name="pfloat" class="solr.FloatField"/>
- <fieldType name="pdouble" class="solr.DoubleField"/>
- <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
-
- <!-- The "RandomSortField" is not used to store or search any
- data. You can declare fields of this type it in your schema
- to generate pseudo-random orderings of your docs for sorting
- or function purposes. The ordering is generated based on the field
- name and the version of the index. As long as the index version
- remains unchanged, and the same field name is reused,
- the ordering of the docs will be consistent.
- If you want different psuedo-random orderings of documents,
- for the same version of the index, use a dynamicField and
- change the field name in the request.
- -->
- <fieldType name="random" class="solr.RandomSortField" indexed="true" />
-
- <!-- solr.TextField allows the specification of custom text analyzers
- specified as a tokenizer and a list of token filters. Different
- analyzers may be specified for indexing and querying.
-
- The optional positionIncrementGap puts space between multiple fields of
- this type on the same document, with the purpose of preventing false phrase
- matching across fields.
-
- For more info on customizing your analyzer chain, please see
- http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
- -->
-
- <!-- One can also specify an existing Analyzer class that has a
- default constructor via the class attribute on the analyzer element.
- Example:
- <fieldType name="text_greek" class="solr.TextField">
- <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
- </fieldType>
- -->
-
- <!-- A text field that only splits on whitespace for exact matching of words -->
- <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- A general text field that has reasonable, generic
- cross-language defaults: it tokenizes with StandardTokenizer,
- removes stop words from case-insensitive "stopwords.txt"
- (empty by default), and down cases. At query time only, it
- also applies synonyms. -->
- <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
- <analyzer type="index">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- A text field with defaults appropriate for English: it
- tokenizes with StandardTokenizer, removes English stop words
- (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
- finally applies Porter's stemming. The query time analyzer
- also applies synonyms from synonyms.txt. -->
- <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <!-- Case insensitive stop word removal.
- -->
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPossessiveFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
- -->
- <filter class="solr.PorterStemFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPossessiveFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
- -->
- <filter class="solr.PorterStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- A text field with defaults appropriate for English, plus
- aggressive word-splitting and autophrase features enabled.
- This field is just like text_en, except it adds
- WordDelimiterFilter to enable splitting and matching of
- words on case-change, alpha numeric boundaries, and
- non-alphanumeric chars. This means certain compound word
- cases will work, for example query "wi fi" will match
- document "WiFi" or "wi-fi".
- -->
- <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
- <analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <!-- Case insensitive stop word removal.
- -->
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.PorterStemFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.PorterStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
- but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
- <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
- <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
- possible with WordDelimiterFilter in conjuncton with stemming. -->
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Just like text_general except it reverses the characters of
- each token, to enable more efficient leading wildcard queries. -->
- <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
- maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- charFilter + WhitespaceTokenizer -->
- <!--
- <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
- <analyzer>
- <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- </analyzer>
- </fieldType>
- -->
-
- <!-- This is an example of using the KeywordTokenizer along
- With various TokenFilterFactories to produce a sortable field
- that does not include some properties of the source text
- -->
- <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
- <analyzer>
- <!-- KeywordTokenizer does no actual tokenizing, so the entire
- input string is preserved as a single token
- -->
- <tokenizer class="solr.KeywordTokenizerFactory"/>
- <!-- The LowerCase TokenFilter does what you expect, which can be
- when you want your sorting to be case insensitive
- -->
- <filter class="solr.LowerCaseFilterFactory" />
- <!-- The TrimFilter removes any leading or trailing whitespace -->
- <filter class="solr.TrimFilterFactory" />
- <!-- The PatternReplaceFilter gives you the flexibility to use
- Java Regular expression to replace any sequence of characters
- matching a pattern with an arbitrary replacement string,
- which may include back references to portions of the original
- string matched by the pattern.
-
- See the Java Regular Expression documentation for more
- information on pattern and replacement string syntax.
-
- http://docs.oracle.com/javase/7/docs/api/java/util/regex/package-summary.html
- -->
- <filter class="solr.PatternReplaceFilterFactory"
- pattern="([^a-z])" replacement="" replace="all"
- />
- </analyzer>
- </fieldType>
-
- <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
- </analyzer>
- </fieldtype>
-
- <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <!--
- The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
- a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
- Attributes of the DelimitedPayloadTokenFilterFactory :
- "delimiter" - a one character delimiter. Default is | (pipe)
- "encoder" - how to encode the following value into a playload
- float -> org.apache.lucene.analysis.payloads.FloatEncoder,
- integer -> o.a.l.a.p.IntegerEncoder
- identity -> o.a.l.a.p.IdentityEncoder
- Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
- -->
- <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
- </analyzer>
- </fieldtype>
-
- <!-- lowercases the entire field value, keeping it as a single token. -->
- <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.KeywordTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory" />
- </analyzer>
- </fieldType>
-
- <!--
- Example of using PathHierarchyTokenizerFactory at index time, so
- queries for paths match documents at that path, or in descendent paths
- -->
- <fieldType name="descendent_path" class="solr.TextField">
- <analyzer type="index">
- <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.KeywordTokenizerFactory" />
- </analyzer>
- </fieldType>
- <!--
- Example of using PathHierarchyTokenizerFactory at query time, so
- queries for paths match documents at that path, or in ancestor paths
- -->
- <fieldType name="ancestor_path" class="solr.TextField">
- <analyzer type="index">
- <tokenizer class="solr.KeywordTokenizerFactory" />
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
- </analyzer>
- </fieldType>
-
- <!-- since fields of this type are by default not stored or indexed,
- any data added to them will be ignored outright. -->
- <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
-
- <!-- This point type indexes the coordinates as separate fields (subFields)
- If subFieldType is defined, it references a type, and a dynamic field
- definition is created matching *___<typename>. Alternately, if
- subFieldSuffix is defined, that is used to create the subFields.
- Example: if subFieldType="double", then the coordinates would be
- indexed in fields myloc_0___double,myloc_1___double.
- Example: if subFieldSuffix="_d" then the coordinates would be indexed
- in fields myloc_0_d,myloc_1_d
- The subFields are an implementation detail of the fieldType, and end
- users normally should not need to know about them.
- -->
- <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
-
- <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
- <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
-
- <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
- For more information about this and other Spatial fields new to Solr 4, see:
- http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
- -->
- <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
- geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
-
- <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
- Parameters:
- defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
- precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
- providerClass: Lets you plug in other exchange provider backend:
- solr.FileExchangeRateProvider is the default and takes one parameter:
- currencyConfig: name of an xml file holding exchange rates
- solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
- ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
- refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
- -->
- <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
-
-
-
- <!-- some examples for different languages (generally ordered by ISO code) -->
-
- <!-- Arabic -->
- <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- for any non-arabic -->
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
- <!-- normalizes ﻯ to ﻱ, etc -->
- <filter class="solr.ArabicNormalizationFilterFactory"/>
- <filter class="solr.ArabicStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Bulgarian -->
- <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
- <filter class="solr.BulgarianStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Catalan -->
- <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes l', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
- <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
- </analyzer>
- </fieldType>
-
- <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
- <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
- <filter class="solr.CJKWidthFilterFactory"/>
- <!-- for any non-CJK -->
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.CJKBigramFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Czech -->
- <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
- <filter class="solr.CzechStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Danish -->
- <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
- <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
- </analyzer>
- </fieldType>
-
- <!-- German -->
- <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
- <filter class="solr.GermanNormalizationFilterFactory"/>
- <filter class="solr.GermanLightStemFilterFactory"/>
- <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
- <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Greek -->
- <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- greek specific lowercase for sigma -->
- <filter class="solr.GreekLowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
- <filter class="solr.GreekStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Spanish -->
- <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
- <filter class="solr.SpanishLightStemFilterFactory"/>
- <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Basque -->
- <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
- <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
- </analyzer>
- </fieldType>
-
- <!-- Persian -->
- <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <!-- for ZWNJ -->
- <charFilter class="solr.PersianCharFilterFactory"/>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.ArabicNormalizationFilterFactory"/>
- <filter class="solr.PersianNormalizationFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
- </analyzer>
- </fieldType>
-
- <!-- Finnish -->
- <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
- <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
- <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
- </analyzer>
- </fieldType>
-
- <!-- French -->
- <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes l', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
- <filter class="solr.FrenchLightStemFilterFactory"/>
- <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
- <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Irish -->
- <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes d', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
- <!-- removes n-, etc. position increments is intentionally false! -->
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
- <filter class="solr.IrishLowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
- <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
- </analyzer>
- </fieldType>
-
- <!-- Galician -->
- <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
- <filter class="solr.GalicianStemFilterFactory"/>
- <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Hindi -->
- <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <!-- normalizes unicode representation -->
- <filter class="solr.IndicNormalizationFilterFactory"/>
- <!-- normalizes variation in spelling -->
- <filter class="solr.HindiNormalizationFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
- <filter class="solr.HindiStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Hungarian -->
- <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
- <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
- <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Armenian -->
- <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
- <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
- </analyzer>
- </fieldType>
-
- <!-- Indonesian -->
- <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
- <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
- <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
- </analyzer>
- </fieldType>
-
- <!-- Italian -->
- <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes l', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
- <filter class="solr.ItalianLightStemFilterFactory"/>
- <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
-
- NOTE: If you want to optimize search for precision, use default operator AND in your query
- parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
- OR if you would like to optimize for recall (default).
- -->
- <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
- <analyzer>
- <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
-
- Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
- is used to segment compounds into its parts and the compound itself is kept as synonym.
-
- Valid values for attribute mode are:
- normal: regular segmentation
- search: segmentation useful for search with synonyms compounds (default)
- extended: same as search mode, but unigrams unknown words (experimental)
-
- For some applications it might be good to use search mode for indexing and normal mode for
- queries to reduce recall and prevent parts of compounds from being matched and highlighted.
- Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
-
- Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
- model with your own entries for segmentation, part-of-speech tags and readings without a need
- to specify weights. Notice that user dictionaries have not been subject to extensive testing.
-
- User dictionary attributes are:
- userDictionary: user dictionary filename
- userDictionaryEncoding: user dictionary encoding (default is UTF-8)
-
- See lang/userdict_ja.txt for a sample user dictionary file.
-
- Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
-
- See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
- -->
- <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
- <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
- <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
- <filter class="solr.JapaneseBaseFormFilterFactory"/>
- <!-- Removes tokens with certain part-of-speech tags -->
- <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
- <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
- <filter class="solr.CJKWidthFilterFactory"/>
- <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
- <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
- <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
- <!-- Lower-cases romaji characters -->
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Latvian -->
- <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
- <filter class="solr.LatvianStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Dutch -->
- <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
- <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
- <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
- </analyzer>
- </fieldType>
-
- <!-- Norwegian -->
- <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
- <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
- <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
- <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Portuguese -->
- <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
- <filter class="solr.PortugueseLightStemFilterFactory"/>
- <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
- <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
- <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Romanian -->
- <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
- <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
- </analyzer>
- </fieldType>
-
- <!-- Russian -->
- <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
- <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
- <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Swedish -->
- <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
- <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
- <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
- </analyzer>
- </fieldType>
-
- <!-- Thai -->
- <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.ThaiWordFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
- </analyzer>
- </fieldType>
-
- <!-- Turkish -->
- <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.TurkishLowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
- <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
- </analyzer>
- </fieldType>
-
- </types>
-
- <!-- Similarity is the scoring routine for each document vs. a query.
- A custom Similarity or SimilarityFactory may be specified here, but
- the default is fine for most applications.
- For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
- -->
- <!--
- <similarity class="com.example.solr.CustomSimilarityFactory">
- <str name="paramkey">param value</str>
- </similarity>
- -->
-
-</schema>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- For more details about configurations options that may appear in
- this file, see http://wiki.apache.org/solr/SolrConfigXml.
--->
-<config>
- <!-- In all configuration below, a prefix of "solr." for class names
- is an alias that causes solr to search appropriate packages,
- including org.apache.solr.(search|update|request|core|analysis)
-
- You may also specify a fully qualified Java classname if you
- have your own custom plugins.
- -->
-
- <!-- Controls what version of Lucene various components of Solr
- adhere to. Generally, you want to use the latest version to
- get all bug fixes and improvements. It is highly recommended
- that you fully re-index after changing this setting as it can
- affect both how text is indexed and queried.
- -->
- <luceneMatchVersion>4.4</luceneMatchVersion>
-
- <!-- <lib/> directives can be used to instruct Solr to load an Jars
- identified and use them to resolve any "plugins" specified in
- your solrconfig.xml or schema.xml (ie: Analyzers, Request
- Handlers, etc...).
-
- All directories and paths are resolved relative to the
- instanceDir.
-
- Please note that <lib/> directives are processed in the order
- that they appear in your solrconfig.xml file, and are "stacked"
- on top of each other when building a ClassLoader - so if you have
- plugin jars with dependencies on other jars, the "lower level"
- dependency jars should be loaded first.
-
- If a "./lib" directory exists in your instanceDir, all files
- found in it are included as if you had used the following
- syntax...
-
- <lib dir="./lib" />
- -->
-
- <!-- A 'dir' option by itself adds any files found in the directory
- to the classpath, this is useful for including all jars in a
- directory.
-
- When a 'regex' is specified in addition to a 'dir', only the
- files in that directory which completely match the regex
- (anchored on both ends) will be included.
-
- If a 'dir' option (with or without a regex) is used and nothing
- is found that matches, a warning will be logged.
-
- The examples below can be used to load some solr-contribs along
- with their external dependencies.
- -->
- <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
-
- <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
-
- <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
-
- <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
-
- <!-- an exact 'path' can be used instead of a 'dir' to specify a
- specific jar file. This will cause a serious error to be logged
- if it can't be loaded.
- -->
- <!--
- <lib path="../a-jar-that-does-not-exist.jar" />
- -->
-
- <!-- Data Directory
-
- Used to specify an alternate directory to hold all index data
- other than the default ./data under the Solr home. If
- replication is in use, this should match the replication
- configuration.
- -->
- <dataDir>${solr.data.dir:}</dataDir>
-
-
- <!-- The DirectoryFactory to use for indexes.
-
- solr.StandardDirectoryFactory is filesystem
- based and tries to pick the best implementation for the current
- JVM and platform. solr.NRTCachingDirectoryFactory, the default,
- wraps solr.StandardDirectoryFactory and caches small files in memory
- for better NRT performance.
-
- One can force a particular implementation via solr.MMapDirectoryFactory,
- solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
-
- solr.RAMDirectoryFactory is memory based, not
- persistent, and doesn't work with replication.
- -->
- <directoryFactory name="DirectoryFactory"
- class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
-
- <!-- The CodecFactory for defining the format of the inverted index.
- The default implementation is SchemaCodecFactory, which is the official Lucene
- index format, but hooks into the schema to provide per-field customization of
- the postings lists and per-document values in the fieldType element
- (postingsFormat/docValuesFormat). Note that most of the alternative implementations
- are experimental, so if you choose to customize the index format, its a good
- idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
- before upgrading to a newer version to avoid unnecessary reindexing.
- -->
- <codecFactory class="solr.SchemaCodecFactory"/>
-
- <!-- To disable dynamic schema REST APIs, use the following for <schemaFactory>:
-
- <schemaFactory class="ClassicIndexSchemaFactory"/>
-
- When ManagedIndexSchemaFactory is specified instead, Solr will load the schema from
- he resource named in 'managedSchemaResourceName', rather than from schema.xml.
- Note that the managed schema resource CANNOT be named schema.xml. If the managed
- schema does not exist, Solr will create it after reading schema.xml, then rename
- 'schema.xml' to 'schema.xml.bak'.
-
- Do NOT hand edit the managed schema - external modifications will be ignored and
- overwritten as a result of schema modification REST API calls.
-
- When ManagedIndexSchemaFactory is specified with mutable = true, schema
- modification REST API calls will be allowed; otherwise, error responses will be
- sent back for these requests.
- -->
- <schemaFactory class="ManagedIndexSchemaFactory">
- <bool name="mutable">true</bool>
- <str name="managedSchemaResourceName">managed-schema</str>
- </schemaFactory>
-
- <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Index Config - These settings control low-level behavior of indexing
- Most example settings here show the default value, but are commented
- out, to more easily see where customizations have been made.
-
- Note: This replaces <indexDefaults> and <mainIndex> from older versions
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
- <indexConfig>
- <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
- LimitTokenCountFilterFactory in your fieldType definition. E.g.
- <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
- -->
- <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
- <!-- <writeLockTimeout>1000</writeLockTimeout> -->
-
- <!-- The maximum number of simultaneous threads that may be
- indexing documents at once in IndexWriter; if more than this
- many threads arrive they will wait for others to finish.
- Default in Solr/Lucene is 8. -->
- <!-- <maxIndexingThreads>8</maxIndexingThreads> -->
-
- <!-- Expert: Enabling compound file will use less files for the index,
- using fewer file descriptors on the expense of performance decrease.
- Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
- <!-- <useCompoundFile>false</useCompoundFile> -->
-
- <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
- indexing for buffering added documents and deletions before they are
- flushed to the Directory.
- maxBufferedDocs sets a limit on the number of documents buffered
- before flushing.
- If both ramBufferSizeMB and maxBufferedDocs is set, then
- Lucene will flush based on whichever limit is hit first. -->
- <!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
- <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
-
- <!-- Expert: Merge Policy
- The Merge Policy in Lucene controls how merging of segments is done.
- The default since Solr/Lucene 3.3 is TieredMergePolicy.
- The default since Lucene 2.3 was the LogByteSizeMergePolicy,
- Even older versions of Lucene used LogDocMergePolicy.
- -->
- <!--
- <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
- <int name="maxMergeAtOnce">10</int>
- <int name="segmentsPerTier">10</int>
- </mergePolicy>
- -->
-
- <!-- Merge Factor
- The merge factor controls how many segments will get merged at a time.
- For TieredMergePolicy, mergeFactor is a convenience parameter which
- will set both MaxMergeAtOnce and SegmentsPerTier at once.
- For LogByteSizeMergePolicy, mergeFactor decides how many new segments
- will be allowed before they are merged into one.
- Default is 10 for both merge policies.
- -->
- <!--
- <mergeFactor>10</mergeFactor>
- -->
-
- <!-- Expert: Merge Scheduler
- The Merge Scheduler in Lucene controls how merges are
- performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
- can perform merges in the background using separate threads.
- The SerialMergeScheduler (Lucene 2.2 default) does not.
- -->
- <!--
- <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
- -->
-
- <!-- LockFactory
-
- This option specifies which Lucene LockFactory implementation
- to use.
-
- single = SingleInstanceLockFactory - suggested for a
- read-only index or when there is no possibility of
- another process trying to modify the index.
- native = NativeFSLockFactory - uses OS native file locking.
- Do not use when multiple solr webapps in the same
- JVM are attempting to share a single index.
- simple = SimpleFSLockFactory - uses a plain file for locking
-
- Defaults: 'native' is default for Solr3.6 and later, otherwise
- 'simple' is the default
-
- More details on the nuances of each LockFactory...
- http://wiki.apache.org/lucene-java/AvailableLockFactories
- -->
- <lockType>${solr.lock.type:native}</lockType>
-
- <!-- Unlock On Startup
-
- If true, unlock any held write or commit locks on startup.
- This defeats the locking mechanism that allows multiple
- processes to safely access a lucene index, and should be used
- with care. Default is "false".
-
- This is not needed if lock type is 'single'
- -->
- <!--
- <unlockOnStartup>false</unlockOnStartup>
- -->
-
- <!-- Expert: Controls how often Lucene loads terms into memory
- Default is 128 and is likely good for most everyone.
- -->
- <!-- <termIndexInterval>128</termIndexInterval> -->
-
- <!-- If true, IndexReaders will be reopened (often more efficient)
- instead of closed and then opened. Default: true
- -->
- <!--
- <reopenReaders>true</reopenReaders>
- -->
-
- <!-- Commit Deletion Policy
- Custom deletion policies can be specified here. The class must
- implement org.apache.lucene.index.IndexDeletionPolicy.
-
- The default Solr IndexDeletionPolicy implementation supports
- deleting index commit points on number of commits, age of
- commit point and optimized status.
-
- The latest commit point should always be preserved regardless
- of the criteria.
- -->
- <!--
- <deletionPolicy class="solr.SolrDeletionPolicy">
- -->
- <!-- The number of commit points to be kept -->
- <!-- <str name="maxCommitsToKeep">1</str> -->
- <!-- The number of optimized commit points to be kept -->
- <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
- <!--
- Delete all commit points once they have reached the given age.
- Supports DateMathParser syntax e.g.
- -->
- <!--
- <str name="maxCommitAge">30MINUTES</str>
- <str name="maxCommitAge">1DAY</str>
- -->
- <!--
- </deletionPolicy>
- -->
-
- <!-- Lucene Infostream
-
- To aid in advanced debugging, Lucene provides an "InfoStream"
- of detailed information when indexing.
-
- Setting The value to true will instruct the underlying Lucene
- IndexWriter to write its debugging info the specified file
- -->
- <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
- </indexConfig>
-
-
- <!-- JMX
-
- This example enables JMX if and only if an existing MBeanServer
- is found, use this if you want to configure JMX through JVM
- parameters. Remove this to disable exposing Solr configuration
- and statistics to JMX.
-
- For more details see http://wiki.apache.org/solr/SolrJmx
- -->
- <jmx />
- <!-- If you want to connect to a particular server, specify the
- agentId
- -->
- <!-- <jmx agentId="myAgent" /> -->
- <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
- <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
- -->
-
- <!-- The default high-performance update handler -->
- <updateHandler class="solr.DirectUpdateHandler2">
-
- <!-- Enables a transaction log, used for real-time get, durability, and
- and solr cloud replica recovery. The log can grow as big as
- uncommitted changes to the index, so use of a hard autoCommit
- is recommended (see below).
- "dir" - the target directory for transaction logs, defaults to the
- solr data directory. -->
- <updateLog>
- <str name="dir">${solr.ulog.dir:}</str>
- </updateLog>
-
- <!-- AutoCommit
-
- Perform a hard commit automatically under certain conditions.
- Instead of enabling autoCommit, consider using "commitWithin"
- when adding documents.
-
- http://wiki.apache.org/solr/UpdateXmlMessages
-
- maxDocs - Maximum number of documents to add since the last
- commit before automatically triggering a new commit.
-
- maxTime - Maximum amount of time in ms that is allowed to pass
- since a document was added before automatically
- triggering a new commit.
- openSearcher - if false, the commit causes recent index changes
- to be flushed to stable storage, but does not cause a new
- searcher to be opened to make those changes visible.
-
- If the updateLog is enabled, then it's highly recommended to
- have some sort of hard autoCommit to limit the log size.
- -->
- <autoCommit>
- <maxTime>15000</maxTime>
- <openSearcher>false</openSearcher>
- </autoCommit>
-
- <!-- softAutoCommit is like autoCommit except it causes a
- 'soft' commit which only ensures that changes are visible
- but does not ensure that data is synced to disk. This is
- faster and more near-realtime friendly than a hard commit.
- -->
- <!--
- <autoSoftCommit>
- <maxTime>1000</maxTime>
- </autoSoftCommit>
- -->
-
- <!-- Update Related Event Listeners
-
- Various IndexWriter related events can trigger Listeners to
- take actions.
-
- postCommit - fired after every commit or optimize command
- postOptimize - fired after every optimize command
- -->
- <!-- The RunExecutableListener executes an external command from a
- hook such as postCommit or postOptimize.
-
- exe - the name of the executable to run
- dir - dir to use as the current working directory. (default=".")
- wait - the calling thread waits until the executable returns.
- (default="true")
- args - the arguments to pass to the program. (default is none)
- env - environment variables to set. (default is none)
- -->
- <!-- This example shows how RunExecutableListener could be used
- with the script based replication...
- http://wiki.apache.org/solr/CollectionDistribution
- -->
- <!--
- <listener event="postCommit" class="solr.RunExecutableListener">
- <str name="exe">solr/bin/snapshooter</str>
- <str name="dir">.</str>
- <bool name="wait">true</bool>
- <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
- <arr name="env"> <str>MYVAR=val1</str> </arr>
- </listener>
- -->
-
- </updateHandler>
-
- <!-- IndexReaderFactory
-
- Use the following format to specify a custom IndexReaderFactory,
- which allows for alternate IndexReader implementations.
-
- ** Experimental Feature **
-
- Please note - Using a custom IndexReaderFactory may prevent
- certain other features from working. The API to
- IndexReaderFactory may change without warning or may even be
- removed from future releases if the problems cannot be
- resolved.
-
-
- ** Features that may not work with custom IndexReaderFactory **
-
- The ReplicationHandler assumes a disk-resident index. Using a
- custom IndexReader implementation may cause incompatibility
- with ReplicationHandler and may cause replication to not work
- correctly. See SOLR-1366 for details.
-
- -->
- <!--
- <indexReaderFactory name="IndexReaderFactory" class="package.class">
- <str name="someArg">Some Value</str>
- </indexReaderFactory >
- -->
- <!-- By explicitly declaring the Factory, the termIndexDivisor can
- be specified.
- -->
- <!--
- <indexReaderFactory name="IndexReaderFactory"
- class="solr.StandardIndexReaderFactory">
- <int name="setTermIndexDivisor">12</int>
- </indexReaderFactory >
- -->
-
- <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Query section - these settings control query time things like caches
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
- <query>
- <!-- Max Boolean Clauses
-
- Maximum number of clauses in each BooleanQuery, an exception
- is thrown if exceeded.
-
- ** WARNING **
-
- This option actually modifies a global Lucene property that
- will affect all SolrCores. If multiple solrconfig.xml files
- disagree on this property, the value at any given moment will
- be based on the last SolrCore to be initialized.
-
- -->
- <maxBooleanClauses>1024</maxBooleanClauses>
-
-
- <!-- Solr Internal Query Caches
-
- There are two implementations of cache available for Solr,
- LRUCache, based on a synchronized LinkedHashMap, and
- FastLRUCache, based on a ConcurrentHashMap.
-
- FastLRUCache has faster gets and slower puts in single
- threaded operation and thus is generally faster than LRUCache
- when the hit ratio of the cache is high (> 75%), and may be
- faster under other scenarios on multi-cpu systems.
- -->
-
- <!-- Filter Cache
-
- Cache used by SolrIndexSearcher for filters (DocSets),
- unordered sets of *all* documents that match a query. When a
- new searcher is opened, its caches may be prepopulated or
- "autowarmed" using data from caches in the old searcher.
- autowarmCount is the number of items to prepopulate. For
- LRUCache, the autowarmed items will be the most recently
- accessed items.
-
- Parameters:
- class - the SolrCache implementation LRUCache or
- (LRUCache or FastLRUCache)
- size - the maximum number of entries in the cache
- initialSize - the initial capacity (number of entries) of
- the cache. (see java.util.HashMap)
- autowarmCount - the number of entries to prepopulate from
- and old cache.
- -->
- <filterCache class="solr.FastLRUCache"
- size="512"
- initialSize="512"
- autowarmCount="0"/>
-
- <!-- Query Result Cache
-
- Caches results of searches - ordered lists of document ids
- (DocList) based on a query, a sort, and the range of documents requested.
- -->
- <queryResultCache class="solr.LRUCache"
- size="512"
- initialSize="512"
- autowarmCount="0"/>
-
- <!-- Document Cache
-
- Caches Lucene Document objects (the stored fields for each
- document). Since Lucene internal document ids are transient,
- this cache will not be autowarmed.
- -->
- <documentCache class="solr.LRUCache"
- size="512"
- initialSize="512"
- autowarmCount="0"/>
-
- <!-- Field Value Cache
-
- Cache used to hold field values that are quickly accessible
- by document id. The fieldValueCache is created by default
- even if not configured here.
- -->
- <!--
- <fieldValueCache class="solr.FastLRUCache"
- size="512"
- autowarmCount="128"
- showItems="32" />
- -->
-
- <!-- Custom Cache
-
- Example of a generic cache. These caches may be accessed by
- name through SolrIndexSearcher.getCache(),cacheLookup(), and
- cacheInsert(). The purpose is to enable easy caching of
- user/application level data. The regenerator argument should
- be specified as an implementation of solr.CacheRegenerator
- if autowarming is desired.
- -->
- <!--
- <cache name="myUserCache"
- class="solr.LRUCache"
- size="4096"
- initialSize="1024"
- autowarmCount="1024"
- regenerator="com.mycompany.MyRegenerator"
- />
- -->
-
-
- <!-- Lazy Field Loading
-
- If true, stored fields that are not requested will be loaded
- lazily. This can result in a significant speed improvement
- if the usual case is to not load all stored fields,
- especially if the skipped fields are large compressed text
- fields.
- -->
- <enableLazyFieldLoading>true</enableLazyFieldLoading>
-
- <!-- Use Filter For Sorted Query
-
- A possible optimization that attempts to use a filter to
- satisfy a search. If the requested sort does not include
- score, then the filterCache will be checked for a filter
- matching the query. If found, the filter will be used as the
- source of document ids, and then the sort will be applied to
- that.
-
- For most situations, this will not be useful unless you
- frequently get the same search repeatedly with different sort
- options, and none of them ever use "score"
- -->
- <!--
- <useFilterForSortedQuery>true</useFilterForSortedQuery>
- -->
-
- <!-- Result Window Size
-
- An optimization for use with the queryResultCache. When a search
- is requested, a superset of the requested number of document ids
- are collected. For example, if a search for a particular query
- requests matching documents 10 through 19, and queryWindowSize is 50,
- then documents 0 through 49 will be collected and cached. Any further
- requests in that range can be satisfied via the cache.
- -->
- <queryResultWindowSize>20</queryResultWindowSize>
-
- <!-- Maximum number of documents to cache for any entry in the
- queryResultCache.
- -->
- <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
-
- <!-- Query Related Event Listeners
-
- Various IndexSearcher related events can trigger Listeners to
- take actions.
-
- newSearcher - fired whenever a new searcher is being prepared
- and there is a current searcher handling requests (aka
- registered). It can be used to prime certain caches to
- prevent long request times for certain requests.
-
- firstSearcher - fired whenever a new searcher is being
- prepared but there is no current registered searcher to handle
- requests or to gain autowarming data from.
-
-
- -->
- <!-- QuerySenderListener takes an array of NamedList and executes a
- local query request for each NamedList in sequence.
- -->
- <listener event="newSearcher" class="solr.QuerySenderListener">
- <arr name="queries">
- <!--
- <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
- <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
- -->
- </arr>
- </listener>
- <listener event="firstSearcher" class="solr.QuerySenderListener">
- <arr name="queries">
- <!--
- <lst>
- <str name="q">static firstSearcher warming in solrconfig.xml</str>
- </lst>
- -->
- </arr>
- </listener>
-
- <!-- Use Cold Searcher
-
- If a search request comes in and there is no current
- registered searcher, then immediately register the still
- warming searcher and use it. If "false" then all requests
- will block until the first searcher is done warming.
- -->
- <useColdSearcher>false</useColdSearcher>
-
- <!-- Max Warming Searchers
-
- Maximum number of searchers that may be warming in the
- background concurrently. An error is returned if this limit
- is exceeded.
-
- Recommend values of 1-2 for read-only slaves, higher for
- masters w/o cache warming.
- -->
- <maxWarmingSearchers>2</maxWarmingSearchers>
-
- </query>
-
-
- <!-- Request Dispatcher
-
- This section contains instructions for how the SolrDispatchFilter
- should behave when processing requests for this SolrCore.
-
- handleSelect is a legacy option that affects the behavior of requests
- such as /select?qt=XXX
-
- handleSelect="true" will cause the SolrDispatchFilter to process
- the request and dispatch the query to a handler specified by the
- "qt" param, assuming "/select" isn't already registered.
-
- handleSelect="false" will cause the SolrDispatchFilter to
- ignore "/select" requests, resulting in a 404 unless a handler
- is explicitly registered with the name "/select"
-
- handleSelect="true" is not recommended for new users, but is the default
- for backwards compatibility
- -->
- <requestDispatcher handleSelect="false" >
- <!-- Request Parsing
-
- These settings indicate how Solr Requests may be parsed, and
- what restrictions may be placed on the ContentStreams from
- those requests
-
- enableRemoteStreaming - enables use of the stream.file
- and stream.url parameters for specifying remote streams.
-
- multipartUploadLimitInKB - specifies the max size (in KiB) of
- Multipart File Uploads that Solr will allow in a Request.
-
- formdataUploadLimitInKB - specifies the max size (in KiB) of
- form data (application/x-www-form-urlencoded) sent via
- POST. You can use POST to pass request parameters not
- fitting into the URL.
-
- addHttpRequestToContext - if set to true, it will instruct
- the requestParsers to include the original HttpServletRequest
- object in the context map of the SolrQueryRequest under the
- key "httpRequest". It will not be used by any of the existing
- Solr components, but may be useful when developing custom
- plugins.
-
- *** WARNING ***
- The settings below authorize Solr to fetch remote files, You
- should make sure your system has some authentication before
- using enableRemoteStreaming="true"
-
- -->
- <requestParsers enableRemoteStreaming="true"
- multipartUploadLimitInKB="2048000"
- formdataUploadLimitInKB="2048"
- addHttpRequestToContext="false"/>
-
- <!-- HTTP Caching
-
- Set HTTP caching related parameters (for proxy caches and clients).
-
- The options below instruct Solr not to output any HTTP Caching
- related headers
- -->
- <httpCaching never304="true" />
- <!-- If you include a <cacheControl> directive, it will be used to
- generate a Cache-Control header (as well as an Expires header
- if the value contains "max-age=")
-
- By default, no Cache-Control header is generated.
-
- You can use the <cacheControl> option even if you have set
- never304="true"
- -->
- <!--
- <httpCaching never304="true" >
- <cacheControl>max-age=30, public</cacheControl>
- </httpCaching>
- -->
- <!-- To enable Solr to respond with automatically generated HTTP
- Caching headers, and to response to Cache Validation requests
- correctly, set the value of never304="false"
-
- This will cause Solr to generate Last-Modified and ETag
- headers based on the properties of the Index.
-
- The following options can also be specified to affect the
- values of these headers...
-
- lastModFrom - the default value is "openTime" which means the
- Last-Modified value (and validation against If-Modified-Since
- requests) will all be relative to when the current Searcher
- was opened. You can change it to lastModFrom="dirLastMod" if
- you want the value to exactly correspond to when the physical
- index was last modified.
-
- etagSeed="..." is an option you can change to force the ETag
- header (and validation against If-None-Match requests) to be
- different even if the index has not changed (ie: when making
- significant changes to your config file)
-
- (lastModifiedFrom and etagSeed are both ignored if you use
- the never304="true" option)
- -->
- <!--
- <httpCaching lastModifiedFrom="openTime"
- etagSeed="Solr">
- <cacheControl>max-age=30, public</cacheControl>
- </httpCaching>
- -->
- </requestDispatcher>
-
- <!-- Request Handlers
-
- http://wiki.apache.org/solr/SolrRequestHandler
-
- Incoming queries will be dispatched to a specific handler by name
- based on the path specified in the request.
-
- Legacy behavior: If the request path uses "/select" but no Request
- Handler has that name, and if handleSelect="true" has been specified in
- the requestDispatcher, then the Request Handler is dispatched based on
- the qt parameter. Handlers without a leading '/' are accessed this way
- like so: http://host/app/[core/]select?qt=name If no qt is
- given, then the requestHandler that declares default="true" will be
- used or the one named "standard".
-
- If a Request Handler is declared with startup="lazy", then it will
- not be initialized until the first request that uses it.
-
- -->
- <!-- SearchHandler
-
- http://wiki.apache.org/solr/SearchHandler
-
- For processing Search Queries, the primary Request Handler
- provided with Solr is "SearchHandler" It delegates to a sequent
- of SearchComponents (see below) and supports distributed
- queries across multiple shards
- -->
- <requestHandler name="/select" class="solr.SearchHandler">
- <!-- default values for query parameters can be specified, these
- will be overridden by parameters in the request
- -->
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <int name="rows">10</int>
- <!-- <str name="df">text</str> -->
- </lst>
- <!-- In addition to defaults, "appends" params can be specified
- to identify values which should be appended to the list of
- multi-val params from the query (or the existing "defaults").
- -->
- <!-- In this example, the param "fq=instock:true" would be appended to
- any query time fq params the user may specify, as a mechanism for
- partitioning the index, independent of any user selected filtering
- that may also be desired (perhaps as a result of faceted searching).
-
- NOTE: there is *absolutely* nothing a client can do to prevent these
- "appends" values from being used, so don't use this mechanism
- unless you are sure you always want it.
- -->
- <!--
- <lst name="appends">
- <str name="fq">inStock:true</str>
- </lst>
- -->
- <!-- "invariants" are a way of letting the Solr maintainer lock down
- the options available to Solr clients. Any params values
- specified here are used regardless of what values may be specified
- in either the query, the "defaults", or the "appends" params.
-
- In this example, the facet.field and facet.query params would
- be fixed, limiting the facets clients can use. Faceting is
- not turned on by default - but if the client does specify
- facet=true in the request, these are the only facets they
- will be able to see counts for; regardless of what other
- facet.field or facet.query params they may specify.
-
- NOTE: there is *absolutely* nothing a client can do to prevent these
- "invariants" values from being used, so don't use this mechanism
- unless you are sure you always want it.
- -->
- <!--
- <lst name="invariants">
- <str name="facet.field">cat</str>
- <str name="facet.field">manu_exact</str>
- <str name="facet.query">price:[* TO 500]</str>
- <str name="facet.query">price:[500 TO *]</str>
- </lst>
- -->
- <!-- If the default list of SearchComponents is not desired, that
- list can either be overridden completely, or components can be
- prepended or appended to the default list. (see below)
- -->
- <!--
- <arr name="components">
- <str>nameOfCustomComponent1</str>
- <str>nameOfCustomComponent2</str>
- </arr>
- -->
- </requestHandler>
-
- <!-- A request handler that returns indented JSON by default -->
- <requestHandler name="/query" class="solr.SearchHandler">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="wt">json</str>
- <str name="indent">true</str>
- <str name="df">text</str>
- </lst>
- </requestHandler>
-
-
- <!-- realtime get handler, guaranteed to return the latest stored fields of
- any document, without the need to commit or open a new searcher. The
- current implementation relies on the updateLog feature being enabled. -->
- <requestHandler name="/get" class="solr.RealTimeGetHandler">
- <lst name="defaults">
- <str name="omitHeader">true</str>
- <str name="wt">json</str>
- <str name="indent">true</str>
- </lst>
- </requestHandler>
-
-
- <!-- A Robust Example
-
- This example SearchHandler declaration shows off usage of the
- SearchHandler with many defaults declared
-
- Note that multiple instances of the same Request Handler
- (SearchHandler) can be registered multiple times with different
- names (and different init parameters)
- -->
- <requestHandler name="/browse" class="solr.SearchHandler">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
-
- <!-- VelocityResponseWriter settings -->
- <str name="wt">velocity</str>
- <str name="v.template">browse</str>
- <str name="v.layout">layout</str>
- <str name="title">Solritas</str>
-
- <!-- Query settings -->
- <str name="defType">edismax</str>
- <str name="qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
- </str>
- <str name="df">text</str>
- <str name="mm">100%</str>
- <str name="q.alt">*:*</str>
- <str name="rows">10</str>
- <str name="fl">*,score</str>
-
- <str name="mlt.qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
- </str>
- <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
- <int name="mlt.count">3</int>
-
- <!-- Faceting defaults -->
- <str name="facet">on</str>
- <str name="facet.field">cat</str>
- <str name="facet.field">manu_exact</str>
- <str name="facet.field">content_type</str>
- <str name="facet.field">author_s</str>
- <str name="facet.query">ipod</str>
- <str name="facet.query">GB</str>
- <str name="facet.mincount">1</str>
- <str name="facet.pivot">cat,inStock</str>
- <str name="facet.range.other">after</str>
- <str name="facet.range">price</str>
- <int name="f.price.facet.range.start">0</int>
- <int name="f.price.facet.range.end">600</int>
- <int name="f.price.facet.range.gap">50</int>
- <str name="facet.range">popularity</str>
- <int name="f.popularity.facet.range.start">0</int>
- <int name="f.popularity.facet.range.end">10</int>
- <int name="f.popularity.facet.range.gap">3</int>
- <str name="facet.range">manufacturedate_dt</str>
- <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
- <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
- <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
- <str name="f.manufacturedate_dt.facet.range.other">before</str>
- <str name="f.manufacturedate_dt.facet.range.other">after</str>
-
- <!-- Highlighting defaults -->
- <str name="hl">on</str>
- <str name="hl.fl">content features title name</str>
- <str name="hl.encoder">html</str>
- <str name="hl.simple.pre"><b></str>
- <str name="hl.simple.post"></b></str>
- <str name="f.title.hl.fragsize">0</str>
- <str name="f.title.hl.alternateField">title</str>
- <str name="f.name.hl.fragsize">0</str>
- <str name="f.name.hl.alternateField">name</str>
- <str name="f.content.hl.snippets">3</str>
- <str name="f.content.hl.fragsize">200</str>
- <str name="f.content.hl.alternateField">content</str>
- <str name="f.content.hl.maxAlternateFieldLength">750</str>
-
- <!-- Spell checking defaults -->
- <str name="spellcheck">on</str>
- <str name="spellcheck.extendedResults">false</str>
- <str name="spellcheck.count">5</str>
- <str name="spellcheck.alternativeTermCount">2</str>
- <str name="spellcheck.maxResultsForSuggest">5</str>
- <str name="spellcheck.collate">true</str>
- <str name="spellcheck.collateExtendedResults">true</str>
- <str name="spellcheck.maxCollationTries">5</str>
- <str name="spellcheck.maxCollations">3</str>
- </lst>
-
- <!-- append spellchecking to our list of components -->
- <arr name="last-components">
- <str>spellcheck</str>
- </arr>
- </requestHandler>
-
-
- <!-- Update Request Handler.
-
- http://wiki.apache.org/solr/UpdateXmlMessages
-
- The canonical Request Handler for Modifying the Index through
- commands specified using XML, JSON, CSV, or JAVABIN
-
- Note: Since solr1.1 requestHandlers requires a valid content
- type header if posted in the body. For example, curl now
- requires: -H 'Content-type:text/xml; charset=utf-8'
-
- To override the request content type and force a specific
- Content-type, use the request parameter:
- ?update.contentType=text/csv
-
- This handler will pick a response format to match the input
- if the 'wt' parameter is not explicit
- -->
- <requestHandler name="/update" class="solr.UpdateRequestHandler">
- <!-- See below for information on defining
- updateRequestProcessorChains that can be used by name
- on each Update Request
- -->
- <lst name="defaults">
- <str name="update.chain">add-unknown-fields-to-the-schema</str>
- </lst>
- </requestHandler>
-
- <!-- for back compat with clients using /update/json and /update/csv -->
- <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
- <lst name="defaults">
- <str name="stream.contentType">application/json</str>
- <str name="update.chain">add-unknown-fields-to-the-schema</str>
- </lst>
- </requestHandler>
- <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
- <lst name="defaults">
- <str name="stream.contentType">application/csv</str>
- <str name="update.chain">add-unknown-fields-to-the-schema</str>
- </lst>
- </requestHandler>
-
- <!-- Solr Cell Update Request Handler
-
- http://wiki.apache.org/solr/ExtractingRequestHandler
-
- -->
- <requestHandler name="/update/extract"
- startup="lazy"
- class="solr.extraction.ExtractingRequestHandler" >
- <lst name="defaults">
- <str name="lowernames">true</str>
- <str name="uprefix">ignored_</str>
-
- <!-- capture link hrefs but ignore div attributes -->
- <str name="captureAttr">true</str>
- <str name="fmap.a">links</str>
- <str name="fmap.div">ignored_</str>
- </lst>
- </requestHandler>
-
-
- <!-- Field Analysis Request Handler
-
- RequestHandler that provides much the same functionality as
- analysis.jsp. Provides the ability to specify multiple field
- types and field names in the same request and outputs
- index-time and query-time analysis for each of them.
-
- Request parameters are:
- analysis.fieldname - field name whose analyzers are to be used
-
- analysis.fieldtype - field type whose analyzers are to be used
- analysis.fieldvalue - text for index-time analysis
- q (or analysis.q) - text for query time analysis
- analysis.showmatch (true|false) - When set to true and when
- query analysis is performed, the produced tokens of the
- field value analysis will be marked as "matched" for every
- token that is produces by the query analysis
- -->
- <requestHandler name="/analysis/field"
- startup="lazy"
- class="solr.FieldAnalysisRequestHandler" />
-
-
- <!-- Document Analysis Handler
-
- http://wiki.apache.org/solr/AnalysisRequestHandler
-
- An analysis handler that provides a breakdown of the analysis
- process of provided documents. This handler expects a (single)
- content stream with the following format:
-
- <docs>
- <doc>
- <field name="id">1</field>
- <field name="name">The Name</field>
- <field name="text">The Text Value</field>
- </doc>
- <doc>...</doc>
- <doc>...</doc>
- ...
- </docs>
-
- Note: Each document must contain a field which serves as the
- unique key. This key is used in the returned response to associate
- an analysis breakdown to the analyzed document.
-
- Like the FieldAnalysisRequestHandler, this handler also supports
- query analysis by sending either an "analysis.query" or "q"
- request parameter that holds the query text to be analyzed. It
- also supports the "analysis.showmatch" parameter which when set to
- true, all field tokens that match the query tokens will be marked
- as a "match".
- -->
- <requestHandler name="/analysis/document"
- class="solr.DocumentAnalysisRequestHandler"
- startup="lazy" />
-
- <!-- Admin Handlers
-
- Admin Handlers - This will register all the standard admin
- RequestHandlers.
- -->
- <requestHandler name="/admin/"
- class="solr.admin.AdminHandlers" />
- <!-- This single handler is equivalent to the following... -->
- <!--
- <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />
- <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />
- <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />
- <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />
- <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
- <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >
- -->
- <!-- If you wish to hide files under ${solr.home}/conf, explicitly
- register the ShowFileRequestHandler using:
- -->
- <!--
- <requestHandler name="/admin/file"
- class="solr.admin.ShowFileRequestHandler" >
- <lst name="invariants">
- <str name="hidden">synonyms.txt</str>
- <str name="hidden">anotherfile.txt</str>
- </lst>
- </requestHandler>
- -->
-
- <!-- ping/healthcheck -->
- <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
- <lst name="invariants">
- <str name="q">solrpingquery</str>
- </lst>
- <lst name="defaults">
- <str name="echoParams">all</str>
- </lst>
- <!-- An optional feature of the PingRequestHandler is to configure the
- handler with a "healthcheckFile" which can be used to enable/disable
- the PingRequestHandler.
- relative paths are resolved against the data dir
- -->
- <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
- </requestHandler>
-
- <!-- Echo the request contents back to the client -->
- <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="echoHandler">true</str>
- </lst>
- </requestHandler>
-
- <!-- Solr Replication
-
- The SolrReplicationHandler supports replicating indexes from a
- "master" used for indexing and "slaves" used for queries.
-
- http://wiki.apache.org/solr/SolrReplication
-
- It is also necessary for SolrCloud to function (in Cloud mode, the
- replication handler is used to bulk transfer segments when nodes
- are added or need to recover).
-
- https://wiki.apache.org/solr/SolrCloud/
- -->
- <requestHandler name="/replication" class="solr.ReplicationHandler" >
- <!--
- To enable simple master/slave replication, uncomment one of the
- sections below, depending on whether this solr instance should be
- the "master" or a "slave". If this instance is a "slave" you will
- also need to fill in the masterUrl to point to a real machine.
- -->
- <!--
- <lst name="master">
- <str name="replicateAfter">commit</str>
- <str name="replicateAfter">startup</str>
- <str name="confFiles">schema.xml,stopwords.txt</str>
- </lst>
- -->
- <!--
- <lst name="slave">
- <str name="masterUrl">http://your-master-hostname:8983/solr</str>
- <str name="pollInterval">00:00:60</str>
- </lst>
- -->
- </requestHandler>
-
- <!-- Search Components
-
- Search components are registered to SolrCore and used by
- instances of SearchHandler (which can access them by name)
-
- By default, the following components are available:
-
- <searchComponent name="query" class="solr.QueryComponent" />
- <searchComponent name="facet" class="solr.FacetComponent" />
- <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
- <searchComponent name="highlight" class="solr.HighlightComponent" />
- <searchComponent name="stats" class="solr.StatsComponent" />
- <searchComponent name="debug" class="solr.DebugComponent" />
-
- Default configuration in a requestHandler would look like:
-
- <arr name="components">
- <str>query</str>
- <str>facet</str>
- <str>mlt</str>
- <str>highlight</str>
- <str>stats</str>
- <str>debug</str>
- </arr>
-
- If you register a searchComponent to one of the standard names,
- that will be used instead of the default.
-
- To insert components before or after the 'standard' components, use:
-
- <arr name="first-components">
- <str>myFirstComponentName</str>
- </arr>
-
- <arr name="last-components">
- <str>myLastComponentName</str>
- </arr>
-
- NOTE: The component registered with the name "debug" will
- always be executed after the "last-components"
-
- -->
-
- <!-- Spell Check
-
- The spell check component can return a list of alternative spelling
- suggestions.
-
- http://wiki.apache.org/solr/SpellCheckComponent
- -->
- <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
-
- <str name="queryAnalyzerFieldType">text_general</str>
-
- <!-- Multiple "Spell Checkers" can be declared and used by this
- component
- -->
-
- <!-- a spellchecker built from a field of the main index -->
- <lst name="spellchecker">
- <str name="name">default</str>
- <str name="field">text</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
- <str name="distanceMeasure">internal</str>
- <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
- <float name="accuracy">0.5</float>
- <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
- <int name="maxEdits">2</int>
- <!-- the minimum shared prefix when enumerating terms -->
- <int name="minPrefix">1</int>
- <!-- maximum number of inspections per result. -->
- <int name="maxInspections">5</int>
- <!-- minimum length of a query term to be considered for correction -->
- <int name="minQueryLength">4</int>
- <!-- maximum threshold of documents a query term can appear to be considered for correction -->
- <float name="maxQueryFrequency">0.01</float>
- <!-- uncomment this to require suggestions to occur in 1% of the documents
- <float name="thresholdTokenFrequency">.01</float>
- -->
- </lst>
-
- <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
- <lst name="spellchecker">
- <str name="name">wordbreak</str>
- <str name="classname">solr.WordBreakSolrSpellChecker</str>
- <str name="field">name</str>
- <str name="combineWords">true</str>
- <str name="breakWords">true</str>
- <int name="maxChanges">10</int>
- </lst>
-
- <!-- a spellchecker that uses a different distance measure -->
- <!--
- <lst name="spellchecker">
- <str name="name">jarowinkler</str>
- <str name="field">spell</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <str name="distanceMeasure">
- org.apache.lucene.search.spell.JaroWinklerDistance
- </str>
- </lst>
- -->
-
- <!-- a spellchecker that use an alternate comparator
-
- comparatorClass be one of:
- 1. score (default)
- 2. freq (Frequency first, then score)
- 3. A fully qualified class name
- -->
- <!--
- <lst name="spellchecker">
- <str name="name">freq</str>
- <str name="field">lowerfilt</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <str name="comparatorClass">freq</str>
- -->
-
- <!-- A spellchecker that reads the list of words from a file -->
- <!--
- <lst name="spellchecker">
- <str name="classname">solr.FileBasedSpellChecker</str>
- <str name="name">file</str>
- <str name="sourceLocation">spellings.txt</str>
- <str name="characterEncoding">UTF-8</str>
- <str name="spellcheckIndexDir">spellcheckerFile</str>
- </lst>
- -->
- </searchComponent>
-
- <!-- A request handler for demonstrating the spellcheck component.
-
- NOTE: This is purely as an example. The whole purpose of the
- SpellCheckComponent is to hook it into the request handler that
- handles your normal user queries so that a separate request is
- not needed to get suggestions.
-
- IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
- NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
-
- See http://wiki.apache.org/solr/SpellCheckComponent for details
- on the request parameters.
- -->
- <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="df">text</str>
- <!-- Solr will use suggestions from both the 'default' spellchecker
- and from the 'wordbreak' spellchecker and combine them.
- collations (re-written queries) can include a combination of
- corrections from both spellcheckers -->
- <str name="spellcheck.dictionary">default</str>
- <str name="spellcheck.dictionary">wordbreak</str>
- <str name="spellcheck">on</str>
- <str name="spellcheck.extendedResults">true</str>
- <str name="spellcheck.count">10</str>
- <str name="spellcheck.alternativeTermCount">5</str>
- <str name="spellcheck.maxResultsForSuggest">5</str>
- <str name="spellcheck.collate">true</str>
- <str name="spellcheck.collateExtendedResults">true</str>
- <str name="spellcheck.maxCollationTries">10</str>
- <str name="spellcheck.maxCollations">5</str>
- </lst>
- <arr name="last-components">
- <str>spellcheck</str>
- </arr>
- </requestHandler>
-
- <!-- Term Vector Component
-
- http://wiki.apache.org/solr/TermVectorComponent
- -->
- <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
-
- <!-- A request handler for demonstrating the term vector component
-
- This is purely as an example.
-
- In reality you will likely want to add the component to your
- already specified request handlers.
- -->
- <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="df">text</str>
- <bool name="tv">true</bool>
- </lst>
- <arr name="last-components">
- <str>tvComponent</str>
- </arr>
- </requestHandler>
-
- <!-- Clustering Component
-
- http://wiki.apache.org/solr/ClusteringComponent
-
- You'll need to set the solr.clustering.enabled system property
- when running solr to run with clustering enabled:
-
- java -Dsolr.clustering.enabled=true -jar start.jar
-
- -->
- <searchComponent name="clustering"
- enable="${solr.clustering.enabled:false}"
- class="solr.clustering.ClusteringComponent" >
- <!-- Declare an engine -->
- <lst name="engine">
- <!-- The name, only one can be named "default" -->
- <str name="name">default</str>
-
- <!-- Class name of Carrot2 clustering algorithm.
-
- Currently available algorithms are:
-
- * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
- * org.carrot2.clustering.stc.STCClusteringAlgorithm
- * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-
- See http://project.carrot2.org/algorithms.html for the
- algorithm's characteristics.
- -->
- <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-
- <!-- Overriding values for Carrot2 default algorithm attributes.
-
- For a description of all available attributes, see:
- http://download.carrot2.org/stable/manual/#chapter.components.
- Use attribute key as name attribute of str elements
- below. These can be further overridden for individual
- requests by specifying attribute key as request parameter
- name and attribute value as parameter value.
- -->
- <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
-
- <!-- Location of Carrot2 lexical resources.
-
- A directory from which to load Carrot2-specific stop words
- and stop labels. Absolute or relative to Solr config directory.
- If a specific resource (e.g. stopwords.en) is present in the
- specified dir, it will completely override the corresponding
- default one that ships with Carrot2.
-
- For an overview of Carrot2 lexical resources, see:
- http://download.carrot2.org/head/manual/#chapter.lexical-resources
- -->
- <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
-
- <!-- The language to assume for the documents.
-
- For a list of allowed values, see:
- http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
- -->
- <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
- </lst>
- <lst name="engine">
- <str name="name">stc</str>
- <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
- </lst>
- </searchComponent>
-
- <!-- A request handler for demonstrating the clustering component
-
- This is purely as an example.
-
- In reality you will likely want to add the component to your
- already specified request handlers.
- -->
- <requestHandler name="/clustering"
- startup="lazy"
- enable="${solr.clustering.enabled:false}"
- class="solr.SearchHandler">
- <lst name="defaults">
- <bool name="clustering">true</bool>
- <str name="clustering.engine">default</str>
- <bool name="clustering.results">true</bool>
- <!-- The title field -->
- <str name="carrot.title">name</str>
- <str name="carrot.url">id</str>
- <!-- The field to cluster on -->
- <str name="carrot.snippet">features</str>
- <!-- produce summaries -->
- <bool name="carrot.produceSummary">true</bool>
- <!-- the maximum number of labels per cluster -->
- <!--<int name="carrot.numDescriptions">5</int>-->
- <!-- produce sub clusters -->
- <bool name="carrot.outputSubClusters">false</bool>
-
- <str name="defType">edismax</str>
- <str name="qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- </str>
- <str name="q.alt">*:*</str>
- <str name="rows">10</str>
- <str name="fl">*,score</str>
- </lst>
- <arr name="last-components">
- <str>clustering</str>
- </arr>
- </requestHandler>
-
- <!-- Terms Component
-
- http://wiki.apache.org/solr/TermsComponent
-
- A component to return terms and document frequency of those
- terms
- -->
- <searchComponent name="terms" class="solr.TermsComponent"/>
-
- <!-- A request handler for demonstrating the terms component -->
- <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <bool name="terms">true</bool>
- <bool name="distrib">false</bool>
- </lst>
- <arr name="components">
- <str>terms</str>
- </arr>
- </requestHandler>
-
-
- <!-- Query Elevation Component
-
- http://wiki.apache.org/solr/QueryElevationComponent
-
- a search component that enables you to configure the top
- results for a given query regardless of the normal lucene
- scoring.
- -->
- <searchComponent name="elevator" class="solr.QueryElevationComponent" >
- <!-- pick a fieldType to analyze queries -->
- <str name="queryFieldType">string</str>
- <str name="config-file">elevate.xml</str>
- </searchComponent>
-
- <!-- A request handler for demonstrating the elevator component -->
- <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="df">text</str>
- </lst>
- <arr name="last-components">
- <str>elevator</str>
- </arr>
- </requestHandler>
-
- <!-- Highlighting Component
-
- http://wiki.apache.org/solr/HighlightingParameters
- -->
- <searchComponent class="solr.HighlightComponent" name="highlight">
- <highlighting>
- <!-- Configure the standard fragmenter -->
- <!-- This could most likely be commented out in the "default" case -->
- <fragmenter name="gap"
- default="true"
- class="solr.highlight.GapFragmenter">
- <lst name="defaults">
- <int name="hl.fragsize">100</int>
- </lst>
- </fragmenter>
-
- <!-- A regular-expression-based fragmenter
- (for sentence extraction)
- -->
- <fragmenter name="regex"
- class="solr.highlight.RegexFragmenter">
- <lst name="defaults">
- <!-- slightly smaller fragsizes work better because of slop -->
- <int name="hl.fragsize">70</int>
- <!-- allow 50% slop on fragment sizes -->
- <float name="hl.regex.slop">0.5</float>
- <!-- a basic sentence pattern -->
- <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
- </lst>
- </fragmenter>
-
- <!-- Configure the standard formatter -->
- <formatter name="html"
- default="true"
- class="solr.highlight.HtmlFormatter">
- <lst name="defaults">
- <str name="hl.simple.pre"><![CDATA[<em>]]></str>
- <str name="hl.simple.post"><![CDATA[</em>]]></str>
- </lst>
- </formatter>
-
- <!-- Configure the standard encoder -->
- <encoder name="html"
- class="solr.highlight.HtmlEncoder" />
-
- <!-- Configure the standard fragListBuilder -->
- <fragListBuilder name="simple"
- class="solr.highlight.SimpleFragListBuilder"/>
-
- <!-- Configure the single fragListBuilder -->
- <fragListBuilder name="single"
- class="solr.highlight.SingleFragListBuilder"/>
-
- <!-- Configure the weighted fragListBuilder -->
- <fragListBuilder name="weighted"
- default="true"
- class="solr.highlight.WeightedFragListBuilder"/>
-
- <!-- default tag FragmentsBuilder -->
- <fragmentsBuilder name="default"
- default="true"
- class="solr.highlight.ScoreOrderFragmentsBuilder">
- <!--
- <lst name="defaults">
- <str name="hl.multiValuedSeparatorChar">/</str>
- </lst>
- -->
- </fragmentsBuilder>
-
- <!-- multi-colored tag FragmentsBuilder -->
- <fragmentsBuilder name="colored"
- class="solr.highlight.ScoreOrderFragmentsBuilder">
- <lst name="defaults">
- <str name="hl.tag.pre"><![CDATA[
- <b style="background:yellow">,<b style="background:lawgreen">,
- <b style="background:aquamarine">,<b style="background:magenta">,
- <b style="background:palegreen">,<b style="background:coral">,
- <b style="background:wheat">,<b style="background:khaki">,
- <b style="background:lime">,<b style="background:deepskyblue">]]></str>
- <str name="hl.tag.post"><![CDATA[</b>]]></str>
- </lst>
- </fragmentsBuilder>
-
- <boundaryScanner name="default"
- default="true"
- class="solr.highlight.SimpleBoundaryScanner">
- <lst name="defaults">
- <str name="hl.bs.maxScan">10</str>
- <str name="hl.bs.chars">.,!? 	 </str>
- </lst>
- </boundaryScanner>
-
- <boundaryScanner name="breakIterator"
- class="solr.highlight.BreakIteratorBoundaryScanner">
- <lst name="defaults">
- <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
- <str name="hl.bs.type">WORD</str>
- <!-- language and country are used when constructing Locale object. -->
- <!-- And the Locale object will be used when getting instance of BreakIterator -->
- <str name="hl.bs.language">en</str>
- <str name="hl.bs.country">US</str>
- </lst>
- </boundaryScanner>
- </highlighting>
- </searchComponent>
-
- <!-- Update Processors
-
- Chains of Update Processor Factories for dealing with Update
- Requests can be declared, and then used by name in Update
- Request Processors
-
- http://wiki.apache.org/solr/UpdateRequestProcessor
-
- -->
-
- <!-- Add unknown fields to the schema
-
- An example field type guessing update processor that will
- attempt to parse string-typed field values as Booleans, Longs,
- Doubles, or Dates, and then add schema fields with the guessed
- field types.
-
- This requires that the schema is both managed and mutable, by
- declaring schemaFactory as ManagedIndexSchemaFactory, with
- mutable specified as true.
-
- See http://wiki.apache.org/solr/GuessingFieldTypes
- -->
- <updateRequestProcessorChain name="add-unknown-fields-to-the-schema">
- <processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>
- <processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
- <processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
- <processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
- <processor class="solr.ParseDateFieldUpdateProcessorFactory">
- <arr name="format">
- <str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
- <str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
- <str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
- <str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
- <str>yyyy-MM-dd'T'HH:mm:ssZ</str>
- <str>yyyy-MM-dd'T'HH:mm:ss</str>
- <str>yyyy-MM-dd'T'HH:mmZ</str>
- <str>yyyy-MM-dd'T'HH:mm</str>
- <str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
- <str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
- <str>yyyy-MM-dd HH:mm:ss.SSS</str>
- <str>yyyy-MM-dd HH:mm:ss,SSS</str>
- <str>yyyy-MM-dd HH:mm:ssZ</str>
- <str>yyyy-MM-dd HH:mm:ss</str>
- <str>yyyy-MM-dd HH:mmZ</str>
- <str>yyyy-MM-dd HH:mm</str>
- <str>yyyy-MM-dd</str>
- </arr>
- </processor>
- <processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
- <str name="defaultFieldType">text_general</str>
- <lst name="typeMapping">
- <str name="valueClass">java.lang.Boolean</str>
- <str name="fieldType">booleans</str>
- </lst>
- <lst name="typeMapping">
- <str name="valueClass">java.util.Date</str>
- <str name="fieldType">tdates</str>
- </lst>
- <lst name="typeMapping">
- <str name="valueClass">java.lang.Long</str>
- <str name="valueClass">java.lang.Integer</str>
- <str name="fieldType">tlongs</str>
- </lst>
- <lst name="typeMapping">
- <str name="valueClass">java.lang.Number</str>
- <str name="fieldType">tdoubles</str>
- </lst>
- </processor>
- <processor class="solr.LogUpdateProcessorFactory"/>
- <processor class="solr.RunUpdateProcessorFactory"/>
- </updateRequestProcessorChain>
-
- <!-- Deduplication
-
- An example dedup update processor that creates the "id" field
- on the fly based on the hash code of some other fields. This
- example has overwriteDupes set to false since we are using the
- id field as the signatureField and Solr will maintain
- uniqueness based on that anyway.
-
- -->
- <!--
- <updateRequestProcessorChain name="dedupe">
- <processor class="solr.processor.SignatureUpdateProcessorFactory">
- <bool name="enabled">true</bool>
- <str name="signatureField">id</str>
- <bool name="overwriteDupes">false</bool>
- <str name="fields">name,features,cat</str>
- <str name="signatureClass">solr.processor.Lookup3Signature</str>
- </processor>
- <processor class="solr.LogUpdateProcessorFactory" />
- <processor class="solr.RunUpdateProcessorFactory" />
- </updateRequestProcessorChain>
- -->
-
- <!-- Language identification
-
- This example update chain identifies the language of the incoming
- documents using the langid contrib. The detected language is
- written to field language_s. No field name mapping is done.
- The fields used for detection are text, title, subject and description,
- making this example suitable for detecting languages form full-text
- rich documents injected via ExtractingRequestHandler.
- See more about langId at http://wiki.apache.org/solr/LanguageDetection
- -->
- <!--
- <updateRequestProcessorChain name="langid">
- <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
- <str name="langid.fl">text,title,subject,description</str>
- <str name="langid.langField">language_s</str>
- <str name="langid.fallback">en</str>
- </processor>
- <processor class="solr.LogUpdateProcessorFactory" />
- <processor class="solr.RunUpdateProcessorFactory" />
- </updateRequestProcessorChain>
- -->
-
- <!-- Script update processor
-
- This example hooks in an update processor implemented using JavaScript.
-
- See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
- -->
- <!--
- <updateRequestProcessorChain name="script">
- <processor class="solr.StatelessScriptUpdateProcessorFactory">
- <str name="script">update-script.js</str>
- <lst name="params">
- <str name="config_param">example config parameter</str>
- </lst>
- </processor>
- <processor class="solr.RunUpdateProcessorFactory" />
- </updateRequestProcessorChain>
- -->
-
- <!-- Response Writers
-
- http://wiki.apache.org/solr/QueryResponseWriter
-
- Request responses will be written using the writer specified by
- the 'wt' request parameter matching the name of a registered
- writer.
-
- The "default" writer is the default and will be used if 'wt' is
- not specified in the request.
- -->
- <!-- The following response writers are implicitly configured unless
- overridden...
- -->
- <!--
- <queryResponseWriter name="xml"
- default="true"
- class="solr.XMLResponseWriter" />
- <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
- <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
- <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
- <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
- <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
- <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
- <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
- -->
-
- <queryResponseWriter name="json" class="solr.JSONResponseWriter">
- <!-- For the purposes of the tutorial, JSON responses are written as
- plain text so that they are easy to read in *any* browser.
- If you expect a MIME type of "application/json" just remove this override.
- -->
- <str name="content-type">text/plain; charset=UTF-8</str>
- </queryResponseWriter>
-
- <!--
- Custom response writers can be declared as needed...
- -->
- <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
-
-
- <!-- XSLT response writer transforms the XML output by any xslt file found
- in Solr's conf/xslt directory. Changes to xslt files are checked for
- every xsltCacheLifetimeSeconds.
- -->
- <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
- <int name="xsltCacheLifetimeSeconds">5</int>
- </queryResponseWriter>
-
- <!-- Query Parsers
-
- http://wiki.apache.org/solr/SolrQuerySyntax
-
- Multiple QParserPlugins can be registered by name, and then
- used in either the "defType" param for the QueryComponent (used
- by SearchHandler) or in LocalParams
- -->
- <!-- example of registering a query parser -->
- <!--
- <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
- -->
-
- <!-- Function Parsers
-
- http://wiki.apache.org/solr/FunctionQuery
-
- Multiple ValueSourceParsers can be registered by name, and then
- used as function names when using the "func" QParser.
- -->
- <!-- example of registering a custom function parser -->
- <!--
- <valueSourceParser name="myfunc"
- class="com.mycompany.MyValueSourceParser" />
- -->
-
-
- <!-- Document Transformers
- http://wiki.apache.org/solr/DocTransformers
- -->
- <!--
- Could be something like:
- <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
- <int name="connection">jdbc://....</int>
- </transformer>
-
- To add a constant value to all docs, use:
- <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
- <int name="value">5</int>
- </transformer>
-
- If you want the user to still be able to change it with _value:something_ use this:
- <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
- <double name="defaultValue">5</double>
- </transformer>
-
- If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
- EditorialMarkerFactory will do exactly that:
- <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
- -->
-
-
- <!-- Legacy config for the admin interface -->
- <admin>
- <defaultQuery>*:*</defaultQuery>
- </admin>
-
-</config>
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+++ /dev/null
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-#some test synonym mappings unlikely to appear in real input text
-aaafoo => aaabar
-bbbfoo => bbbfoo bbbbar
-cccfoo => cccbar cccbaz
-fooaaa,baraaa,bazaaa
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
+++ /dev/null
-name=collection1
\ No newline at end of file
+++ /dev/null
-id,cat,name,price,inStock,author,series_t,sequence_i,genre_s
-0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
-0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
-055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
-0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
-0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
-0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
-0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
-0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
-0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
-080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
+++ /dev/null
-[
- {
- "id" : "978-0641723445",
- "cat" : ["book","hardcover"],
- "name" : "The Lightning Thief",
- "author" : "Rick Riordan",
- "series_t" : "Percy Jackson and the Olympians",
- "sequence_i" : 1,
- "genre_s" : "fantasy",
- "inStock" : true,
- "price" : 12.50,
- "pages_i" : 384
- }
-,
- {
- "id" : "978-1423103349",
- "cat" : ["book","paperback"],
- "name" : "The Sea of Monsters",
- "author" : "Rick Riordan",
- "series_t" : "Percy Jackson and the Olympians",
- "sequence_i" : 2,
- "genre_s" : "fantasy",
- "inStock" : true,
- "price" : 6.49,
- "pages_i" : 304
- }
-,
- {
- "id" : "978-1857995879",
- "cat" : ["book","paperback"],
- "name" : "Sophie's World : The Greek Philosophers",
- "author" : "Jostein Gaarder",
- "sequence_i" : 1,
- "genre_s" : "fantasy",
- "inStock" : true,
- "price" : 3.07,
- "pages_i" : 64
- }
-,
- {
- "id" : "978-1933988177",
- "cat" : ["book","paperback"],
- "name" : "Lucene in Action, Second Edition",
- "author" : "Michael McCandless",
- "sequence_i" : 1,
- "genre_s" : "IT",
- "inStock" : true,
- "price" : 30.50,
- "pages_i" : 475
- }
-]
+++ /dev/null
-<?xml version="1.0" encoding="GB18030"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
- <doc>
- <field name="id">GB18030TEST</field>
- <field name="name">Test with some GB18030 encoded characters</field>
- <field name="features">No accents here</field>
- <field name="features">ÕâÊÇÒ»¸ö¹¦ÄÜ</field>
- <field name="features">This is a feature (translated)</field>
- <field name="features">Õâ·ÝÎļþÊǺÜÓйâÔó</field>
- <field name="features">This document is very shiny (translated)</field>
- <field name="price">0</field>
- <field name="inStock">true</field>
- </doc>
-</add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
-<doc>
- <field name="id">SP2514N</field>
- <field name="name">Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133</field>
- <field name="manu">Samsung Electronics Co. Ltd.</field>
- <!-- Join -->
- <field name="manu_id_s">samsung</field>
- <field name="cat">electronics</field>
- <field name="cat">hard drive</field>
- <field name="features">7200RPM, 8MB cache, IDE Ultra ATA-133</field>
- <field name="features">NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor</field>
- <field name="price">92</field>
- <field name="popularity">6</field>
- <field name="inStock">true</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
- <!-- Near Oklahoma city -->
- <field name="store">35.0752,-97.032</field>
-</doc>
-
-<doc>
- <field name="id">6H500F0</field>
- <field name="name">Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300</field>
- <field name="manu">Maxtor Corp.</field>
- <!-- Join -->
- <field name="manu_id_s">maxtor</field>
- <field name="cat">electronics</field>
- <field name="cat">hard drive</field>
- <field name="features">SATA 3.0Gb/s, NCQ</field>
- <field name="features">8.5ms seek</field>
- <field name="features">16MB cache</field>
- <field name="price">350</field>
- <field name="popularity">6</field>
- <field name="inStock">true</field>
- <!-- Buffalo store -->
- <field name="store">45.17614,-93.87341</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
-</doc>
-</add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
-
-<doc>
- <field name="id">F8V7067-APL-KIT</field>
- <field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
- <field name="manu">Belkin</field>
- <!-- Join -->
- <field name="manu_id_s">belkin</field>
- <field name="cat">electronics</field>
- <field name="cat">connector</field>
- <field name="features">car power adapter, white</field>
- <field name="weight">4</field>
- <field name="price">19.95</field>
- <field name="popularity">1</field>
- <field name="inStock">false</field>
- <!-- Buffalo store -->
- <field name="store">45.18014,-93.87741</field>
- <field name="manufacturedate_dt">2005-08-01T16:30:25Z</field>
-</doc>
-
-<doc>
- <field name="id">IW-02</field>
- <field name="name">iPod & iPod Mini USB 2.0 Cable</field>
- <field name="manu">Belkin</field>
- <!-- Join -->
- <field name="manu_id_s">belkin</field>
- <field name="cat">electronics</field>
- <field name="cat">connector</field>
- <field name="features">car power adapter for iPod, white</field>
- <field name="weight">2</field>
- <field name="price">11.50</field>
- <field name="popularity">1</field>
- <field name="inStock">false</field>
- <!-- San Francisco store -->
- <field name="store">37.7752,-122.4232</field>
- <field name="manufacturedate_dt">2006-02-14T23:55:59Z</field>
-</doc>
-
-
-</add>
-
-
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add><doc>
- <field name="id">MA147LL/A</field>
- <field name="name">Apple 60 GB iPod with Video Playback Black</field>
- <field name="manu">Apple Computer Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">apple</field>
- <field name="cat">electronics</field>
- <field name="cat">music</field>
- <field name="features">iTunes, Podcasts, Audiobooks</field>
- <field name="features">Stores up to 15,000 songs, 25,000 photos, or 150 hours of video</field>
- <field name="features">2.5-inch, 320x240 color TFT LCD display with LED backlight</field>
- <field name="features">Up to 20 hours of battery life</field>
- <field name="features">Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video</field>
- <field name="features">Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication</field>
- <field name="includes">earbud headphones, USB cable</field>
- <field name="weight">5.5</field>
- <field name="price">399.00</field>
- <field name="popularity">10</field>
- <field name="inStock">true</field>
- <!-- Dodge City store -->
- <field name="store">37.7752,-100.0232</field>
- <field name="manufacturedate_dt">2005-10-12T08:00:00Z</field>
-</doc></add>
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
- <doc>
- <field name="id">adata</field>
- <field name="compName_s">A-Data Technology</field>
- <field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
- </doc>
- <doc>
- <field name="id">apple</field>
- <field name="compName_s">Apple</field>
- <field name="address_s">1 Infinite Way, Cupertino CA</field>
- </doc>
- <doc>
- <field name="id">asus</field>
- <field name="compName_s">ASUS Computer</field>
- <field name="address_s">800 Corporate Way Fremont, CA 94539</field>
- </doc>
- <doc>
- <field name="id">ati</field>
- <field name="compName_s">ATI Technologies</field>
- <field name="address_s">33 Commerce Valley Drive East Thornhill, ON L3T 7N6 Canada</field>
- </doc>
- <doc>
- <field name="id">belkin</field>
- <field name="compName_s">Belkin</field>
- <field name="address_s">12045 E. Waterfront Drive Playa Vista, CA 90094</field>
- </doc>
- <doc>
- <field name="id">canon</field>
- <field name="compName_s">Canon, Inc.</field>
- <field name="address_s">One Canon Plaza Lake Success, NY 11042</field>
- </doc>
- <doc>
- <field name="id">corsair</field>
- <field name="compName_s">Corsair Microsystems</field>
- <field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
- </doc>
- <doc>
- <field name="id">dell</field>
- <field name="compName_s">Dell, Inc.</field>
- <field name="address_s">One Dell Way Round Rock, Texas 78682</field>
- </doc>
- <doc>
- <field name="id">maxtor</field>
- <field name="compName_s">Maxtor Corporation</field>
- <field name="address_s">920 Disc Drive Scotts Valley, CA 95066</field>
- </doc>
- <doc>
- <field name="id">samsung</field>
- <field name="compName_s">Samsung Electronics Co. Ltd.</field>
- <field name="address_s">105 Challenger Rd. Ridgefield Park, NJ 07660-0511</field>
- </doc>
- <doc>
- <field name="id">viewsonic</field>
- <field name="compName_s">ViewSonic Corp</field>
- <field name="address_s">381 Brea Canyon Road Walnut, CA 91789-0708</field>
- </doc>
-</add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
-<doc>
- <field name="id">TWINX2048-3200PRO</field>
- <field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
- <field name="manu">Corsair Microsystems Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">corsair</field>
- <field name="cat">electronics</field>
- <field name="cat">memory</field>
- <field name="features">CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
- <field name="price">185</field>
- <field name="popularity">5</field>
- <field name="inStock">true</field>
- <!-- San Francisco store -->
- <field name="store">37.7752,-122.4232</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
-
- <!-- a field for testing payload tagged text via DelimitedPayloadTokenFilter -->
- <field name="payloads">electronics|6.0 memory|3.0</field>
-</doc>
-
-<doc>
- <field name="id">VS1GB400C3</field>
- <field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
- <field name="manu">Corsair Microsystems Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">corsair</field>
- <field name="cat">electronics</field>
- <field name="cat">memory</field>
- <field name="price">74.99</field>
- <field name="popularity">7</field>
- <field name="inStock">true</field>
- <!-- Dodge City store -->
- <field name="store">37.7752,-100.0232</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
-
- <field name="payloads">electronics|4.0 memory|2.0</field>
-</doc>
-
-<doc>
- <field name="id">VDBDB1A16</field>
- <field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
- <field name="manu">A-DATA Technology Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">corsair</field>
- <field name="cat">electronics</field>
- <field name="cat">memory</field>
- <field name="features">CAS latency 3, 2.7v</field>
- <!-- note: price & popularity is missing on this one -->
- <field name="popularity">0</field>
- <field name="inStock">true</field>
- <!-- Buffalo store -->
- <field name="store">45.18414,-93.88141</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
-
- <field name="payloads">electronics|0.9 memory|0.1</field>
-</doc>
-
-</add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- Example documents utilizing the CurrencyField type -->
-<add>
-<doc>
- <field name="id">USD</field>
- <field name="name">One Dollar</field>
- <field name="manu">Bank of America</field>
- <field name="manu_id_s">boa</field>
- <field name="cat">currency</field>
- <field name="features">Coins and notes</field>
- <field name="price_c">1,USD</field>
- <field name="inStock">true</field>
-</doc>
-
-<doc>
- <field name="id">EUR</field>
- <field name="name">One Euro</field>
- <field name="manu">European Union</field>
- <field name="manu_id_s">eu</field>
- <field name="cat">currency</field>
- <field name="features">Coins and notes</field>
- <field name="price_c">1,EUR</field>
- <field name="inStock">true</field>
-</doc>
-
-<doc>
- <field name="id">GBP</field>
- <field name="name">One British Pound</field>
- <field name="manu">U.K.</field>
- <field name="manu_id_s">uk</field>
- <field name="cat">currency</field>
- <field name="features">Coins and notes</field>
- <field name="price_c">1,GBP</field>
- <field name="inStock">true</field>
-</doc>
-
-<doc>
- <field name="id">NOK</field>
- <field name="name">One Krone</field>
- <field name="manu">Bank of Norway</field>
- <field name="manu_id_s">nor</field>
- <field name="cat">currency</field>
- <field name="features">Coins and notes</field>
- <field name="price_c">1,NOK</field>
- <field name="inStock">true</field>
-</doc>
-
-</add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add><doc>
- <field name="id">3007WFP</field>
- <field name="name">Dell Widescreen UltraSharp 3007WFP</field>
- <field name="manu">Dell, Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">dell</field>
- <field name="cat">electronics</field>
- <field name="cat">monitor</field>
- <field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
- <field name="includes">USB cable</field>
- <field name="weight">401.6</field>
- <field name="price">2199</field>
- <field name="popularity">6</field>
- <field name="inStock">true</field>
- <!-- Buffalo store -->
- <field name="store">43.17614,-90.57341</field>
-</doc></add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add><doc>
- <field name="id">VA902B</field>
- <field name="name">ViewSonic VA902B - flat panel display - TFT - 19"</field>
- <field name="manu">ViewSonic Corp.</field>
- <!-- Join -->
- <field name="manu_id_s">viewsonic</field>
- <field name="cat">electronics</field>
- <field name="cat">monitor</field>
- <field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
- <field name="weight">190.4</field>
- <field name="price">279.95</field>
- <field name="popularity">6</field>
- <field name="inStock">true</field>
- <!-- Buffalo store -->
- <field name="store">45.18814,-93.88541</field>
-</doc></add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add><doc>
- <field name="id">0579B002</field>
- <field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
- <field name="manu">Canon Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">canon</field>
- <field name="cat">electronics</field>
- <field name="cat">multifunction printer</field>
- <field name="cat">printer</field>
- <field name="cat">scanner</field>
- <field name="cat">copier</field>
- <field name="features">Multifunction ink-jet color photo printer</field>
- <field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
- <field name="features">2.5" color LCD preview screen</field>
- <field name="features">Duplex Copying</field>
- <field name="features">Printing speed up to 29ppm black, 19ppm color</field>
- <field name="features">Hi-Speed USB</field>
- <field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
- <field name="weight">352</field>
- <field name="price">179.99</field>
- <field name="popularity">6</field>
- <field name="inStock">true</field>
- <!-- Buffalo store -->
- <field name="store">45.19214,-93.89941</field>
-</doc></add>
-
+++ /dev/null
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-FILES=$*
-URL=http://localhost:8983/solr/update
-
-for f in $FILES; do
- echo Posting file $f to $URL
- curl $URL --data-binary @$f -H 'Content-type:application/xml'
- echo
-done
-
-#send the commit command to make sure all the changes are flushed and visible
-#curl $URL --data-binary '<commit softCommit=true/>' -H 'Content-type:application/xml'
-
-curl "$URL?softCommit=true"
-echo
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add><doc>
- <field name="id">9885A004</field>
- <field name="name">Canon PowerShot SD500</field>
- <field name="manu">Canon Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">canon</field>
- <field name="cat">electronics</field>
- <field name="cat">camera</field>
- <field name="features">3x zoop, 7.1 megapixel Digital ELPH</field>
- <field name="features">movie clips up to 640x480 @30 fps</field>
- <field name="features">2.0" TFT LCD, 118,000 pixels</field>
- <field name="features">built in flash, red-eye reduction</field>
- <field name="includes">32MB SD card, USB cable, AV cable, battery</field>
- <field name="weight">6.4</field>
- <field name="price">329.95</field>
- <field name="popularity">7</field>
- <field name="inStock">true</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
- <!-- Buffalo store -->
- <field name="store">45.19614,-93.90341</field>
-</doc></add>
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
-<doc>
- <field name="id">SOLR1000</field>
- <field name="name">Solr, the Enterprise Search Server</field>
- <field name="manu">Apache Software Foundation</field>
- <field name="cat">software</field>
- <field name="cat">search</field>
- <field name="features">Advanced Full-Text Search Capabilities using Lucene</field>
- <field name="features">Optimized for High Volume Web Traffic</field>
- <field name="features">Standards Based Open Interfaces - XML and HTTP</field>
- <field name="features">Comprehensive HTML Administration Interfaces</field>
- <field name="features">Scalability - Efficient Replication to other Solr Search Servers</field>
- <field name="features">Flexible and Adaptable with XML configuration and Schema</field>
- <field name="features">Good unicode support: héllo (hello with an accent over the e)</field>
- <field name="price">0</field>
- <field name="popularity">10</field>
- <field name="inStock">true</field>
- <field name="incubationdate_dt">2006-01-17T00:00:00.000Z</field>
-</doc>
-</add>
-
+++ /dev/null
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#Test script to tell if the server is accepting UTF-8
-#The python writer currently escapes non-ascii chars, so it's good for testing
-
-URL=http://localhost:8983/solr
-
-if [ ! -z $1 ]; then
- URL=$1
-fi
-
-curl "$URL/select?q=hello¶ms=explicit&wt=python" 2> /dev/null | grep 'hello' > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "Solr server is up."
-else
- echo "ERROR: Could not curl to Solr - is curl installed? Is Solr not running?"
- exit 1
-fi
-
-curl "$URL/select?q=h%C3%A9llo&echoParams=explicit&wt=python" 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "HTTP GET is accepting UTF-8"
-else
- echo "ERROR: HTTP GET is not accepting UTF-8"
-fi
-
-curl $URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "HTTP POST is accepting UTF-8"
-else
- echo "ERROR: HTTP POST is not accepting UTF-8"
-fi
-
-curl $URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "HTTP POST defaults to UTF-8"
-else
- echo "HTTP POST does not default to UTF-8"
-fi
-
-
-#A unicode character outside of the BMP (a circle with an x inside)
-CHAR="𐌈"
-CODEPOINT='0x10308'
-#URL encoded UTF8 of the codepoint
-URL_UTF8='%F0%90%8C%88'
-#expected return of the python writer (currently uses UTF-16 surrogates)
-EXPECTED='\\ud800\\udf08'
-
-curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=python" 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "HTTP GET is accepting UTF-8 beyond the basic multilingual plane"
-else
- echo "ERROR: HTTP GET is not accepting UTF-8 beyond the basic multilingual plane"
-fi
-
-curl $URL/select --data-binary "q=$URL_UTF8&echoParams=explicit&wt=python" -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "HTTP POST is accepting UTF-8 beyond the basic multilingual plane"
-else
- echo "ERROR: HTTP POST is not accepting UTF-8 beyond the basic multilingual plane"
-fi
-
-curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=python" --data-binary '' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "HTTP POST + URL params is accepting UTF-8 beyond the basic multilingual plane"
-else
- echo "ERROR: HTTP POST + URL params is not accepting UTF-8 beyond the basic multilingual plane"
-fi
-
-#curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=json" 2> /dev/null | od -tx1 -w1000 | sed 's/ //g' | grep 'f4808198' > /dev/null 2>&1
-curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=json" 2> /dev/null | grep "$CHAR" > /dev/null 2>&1
-if [ $? = 0 ]; then
- echo "Response correctly returns UTF-8 beyond the basic multilingual plane"
-else
- echo "ERROR: Response can't return UTF-8 beyond the basic multilingual plane"
-fi
-
-
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- After posting this to SOLR with post.sh, searching for "êâîôû" from
- the solr/admin/ search page must return this document.
- -->
-
-<add>
- <doc>
- <field name="id">UTF8TEST</field>
- <field name="name">Test with some UTF-8 encoded characters</field>
- <field name="manu">Apache Software Foundation</field>
- <field name="cat">software</field>
- <field name="cat">search</field>
- <field name="features">No accents here</field>
- <field name="features">This is an e acute: é</field>
- <field name="features">eaiou with circumflexes: êâîôû</field>
- <field name="features">eaiou with umlauts: ëäïöü</field>
- <field name="features">tag with escaped chars: <nicetag/></field>
- <field name="features">escaped ampersand: Bonnie & Clyde</field>
- <field name="features">Outside the BMP:𐌈 codepoint=10308, a circle with an x inside. UTF8=f0908c88 UTF16=d800 df08</field>
- <field name="price">0</field>
- <field name="inStock">true</field>
- </doc>
-</add>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
-<doc>
- <field name="id">EN7800GTX/2DHTV/256M</field>
- <field name="name">ASUS Extreme N7800GTX/2DHTV (256 MB)</field>
- <!-- Denormalized -->
- <field name="manu">ASUS Computer Inc.</field>
- <!-- Join -->
- <field name="manu_id_s">asus</field>
- <field name="cat">electronics</field>
- <field name="cat">graphics card</field>
- <field name="features">NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz</field>
- <field name="features">256MB GDDR3 Memory clocked at 1.35GHz</field>
- <field name="features">PCI Express x16</field>
- <field name="features">Dual DVI connectors, HDTV out, video input</field>
- <field name="features">OpenGL 2.0, DirectX 9.0</field>
- <field name="weight">16</field>
- <field name="price">479.95</field>
- <field name="popularity">7</field>
- <field name="store">40.7143,-74.006</field>
- <field name="inStock">false</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
-</doc>
- <!-- yes, you can add more than one document at a time -->
-<doc>
- <field name="id">100-435805</field>
- <field name="name">ATI Radeon X1900 XTX 512 MB PCIE Video Card</field>
- <field name="manu">ATI Technologies</field>
- <!-- Join -->
- <field name="manu_id_s">ati</field>
- <field name="cat">electronics</field>
- <field name="cat">graphics card</field>
- <field name="features">ATI RADEON X1900 GPU/VPU clocked at 650MHz</field>
- <field name="features">512MB GDDR3 SDRAM clocked at 1.55GHz</field>
- <field name="features">PCI Express x16</field>
- <field name="features">dual DVI, HDTV, svideo, composite out</field>
- <field name="features">OpenGL 2.0, DirectX 9.0</field>
- <field name="weight">48</field>
- <field name="price">649.99</field>
- <field name="popularity">7</field>
- <field name="inStock">false</field>
- <field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
- <!-- NYC store -->
- <field name="store">40.7143,-74.006</field>
-</doc>
-</add>
+++ /dev/null
-This is an alternative setup structure to support multiple cores.
-
-To run this configuration, start jetty in the example/ directory using:
-
-java -Dsolr.solr.home=multicore -jar start.jar
-
-For general examples on standard solr configuration, see the "solr" directory.
+++ /dev/null
-<?xml version="1.0" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<schema name="example core zero" version="1.1">
- <types>
- <fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
- </types>
-
- <fields>
- <!-- general -->
- <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
- <field name="type" type="string" indexed="true" stored="true" multiValued="false" />
- <field name="name" type="string" indexed="true" stored="true" multiValued="false" />
- <field name="core0" type="string" indexed="true" stored="true" multiValued="false" />
- <field name="_version_" type="long" indexed="true" stored="true"/>
- </fields>
-
- <!-- field to use to determine and enforce document uniqueness. -->
- <uniqueKey>id</uniqueKey>
-
- <!-- field for the QueryParser to use when an explicit fieldname is absent -->
- <defaultSearchField>name</defaultSearchField>
-
- <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
- <solrQueryParser defaultOperator="OR"/>
-</schema>
-
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is a stripped down config file used for a simple example...
- It is *not* a good example to work from.
--->
-<config>
- <luceneMatchVersion>4.4</luceneMatchVersion>
- <!-- The DirectoryFactory to use for indexes.
- solr.StandardDirectoryFactory, the default, is filesystem based.
- solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
- <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
-
- <dataDir>${solr.core0.data.dir:}</dataDir>
-
- <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
-
- <schemaFactory class="ManagedIndexSchemaFactory">
- <bool name="mutable">true</bool>
- <str name="managedSchemaResourceName">managed-schema</str>
- </schemaFactory>
-
- When ManagedIndexSchemaFactory is specified, Solr will load the schema from
- he resource named in 'managedSchemaResourceName', rather than from schema.xml.
- Note that the managed schema resource CANNOT be named schema.xml. If the managed
- schema does not exist, Solr will create it after reading schema.xml, then rename
- 'schema.xml' to 'schema.xml.bak'.
-
- Do NOT hand edit the managed schema - external modifications will be ignored and
- overwritten as a result of schema modification REST API calls.
-
- When ManagedIndexSchemaFactory is specified with mutable = true, schema
- modification REST API calls will be allowed; otherwise, error responses will be
- sent back for these requests.
- -->
- <schemaFactory class="ClassicIndexSchemaFactory"/>
-
- <updateHandler class="solr.DirectUpdateHandler2">
- <updateLog>
- <str name="dir">${solr.core0.data.dir:}</str>
- </updateLog>
- </updateHandler>
-
- <!-- realtime get handler, guaranteed to return the latest stored fields
- of any document, without the need to commit or open a new searcher. The current
- implementation relies on the updateLog feature being enabled. -->
- <requestHandler name="/get" class="solr.RealTimeGetHandler">
- <lst name="defaults">
- <str name="omitHeader">true</str>
- </lst>
- </requestHandler>
-
- <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
-
- <requestDispatcher handleSelect="true" >
- <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" />
- </requestDispatcher>
-
- <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
- <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
- <requestHandler name="/update" class="solr.UpdateRequestHandler" />
- <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
-
- <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
- <lst name="invariants">
- <str name="q">solrpingquery</str>
- </lst>
- <lst name="defaults">
- <str name="echoParams">all</str>
- </lst>
- </requestHandler>
-
- <!-- config for the admin interface -->
- <admin>
- <defaultQuery>solr</defaultQuery>
- </admin>
-
-</config>
-
+++ /dev/null
-<?xml version="1.0" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<schema name="example core one" version="1.1">
- <types>
- <fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
- </types>
-
- <fields>
- <!-- general -->
- <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
- <field name="type" type="string" indexed="true" stored="true" multiValued="false" />
- <field name="name" type="string" indexed="true" stored="true" multiValued="false" />
- <field name="core1" type="string" indexed="true" stored="true" multiValued="false" />
- <field name="_version_" type="long" indexed="true" stored="true"/>
- </fields>
-
- <!-- field to use to determine and enforce document uniqueness. -->
- <uniqueKey>id</uniqueKey>
-
- <!-- field for the QueryParser to use when an explicit fieldname is absent -->
- <defaultSearchField>name</defaultSearchField>
-
- <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
- <solrQueryParser defaultOperator="OR"/>
-</schema>
-
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is a stripped down config file used for a simple example...
- It is *not* a good example to work from.
--->
-<config>
- <luceneMatchVersion>4.4</luceneMatchVersion>
- <!-- The DirectoryFactory to use for indexes.
- solr.StandardDirectoryFactory, the default, is filesystem based.
- solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
- <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
-
- <dataDir>${solr.core1.data.dir:}</dataDir>
-
- <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
-
- <schemaFactory class="ManagedIndexSchemaFactory">
- <bool name="mutable">true</bool>
- <str name="managedSchemaResourceName">managed-schema</str>
- </schemaFactory>
-
- When ManagedIndexSchemaFactory is specified, Solr will load the schema from
- he resource named in 'managedSchemaResourceName', rather than from schema.xml.
- Note that the managed schema resource CANNOT be named schema.xml. If the managed
- schema does not exist, Solr will create it after reading schema.xml, then rename
- 'schema.xml' to 'schema.xml.bak'.
-
- Do NOT hand edit the managed schema - external modifications will be ignored and
- overwritten as a result of schema modification REST API calls.
-
- When ManagedIndexSchemaFactory is specified with mutable = true, schema
- modification REST API calls will be allowed; otherwise, error responses will be
- sent back for these requests.
- -->
- <schemaFactory class="ClassicIndexSchemaFactory"/>
-
- <updateHandler class="solr.DirectUpdateHandler2">
- <updateLog>
- <str name="dir">${solr.core1.data.dir:}</str>
- </updateLog>
- </updateHandler>
-
- <!-- realtime get handler, guaranteed to return the latest stored fields
- of any document, without the need to commit or open a new searcher. The current
- implementation relies on the updateLog feature being enabled. -->
- <requestHandler name="/get" class="solr.RealTimeGetHandler">
- <lst name="defaults">
- <str name="omitHeader">true</str>
- </lst>
- </requestHandler>
-
- <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
-
- <requestDispatcher handleSelect="true" >
- <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" />
- </requestDispatcher>
-
- <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
- <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
- <requestHandler name="/update" class="solr.UpdateRequestHandler" />
- <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
-
- <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
- <lst name="invariants">
- <str name="q">solrpingquery</str>
- </lst>
- <lst name="defaults">
- <str name="echoParams">all</str>
- </lst>
- </requestHandler>
-
- <!-- config for the admin interface -->
- <admin>
- <defaultQuery>solr</defaultQuery>
- </admin>
-
-</config>
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add>
-
-<doc>
- <field name="id">F8V7067-APL-KIT</field>
- <field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
-</doc>
-
-<doc>
- <field name="id">IW-02</field>
- <field name="name">iPod & iPod Mini USB 2.0 Cable</field>
-</doc>
-
-
-</add>
-
-
-
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<add><doc>
- <field name="id">MA147LL/A</field>
- <field name="name">Apple 60 GB iPod with Video Playback Black</field>
-
-</doc></add>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- All (relative) paths are relative to the installation path
-
- persistent: Save changes made via the API to this file
- sharedLib: path to a lib directory that will be shared across all cores
--->
-<solr persistent="false">
-
- <!--
- adminPath: RequestHandler path to manage cores.
- If 'null' (or absent), cores will not be manageable via request handler
- -->
- <cores adminPath="/admin/cores" host="${host:}" hostPort="${jetty.port:8983}" hostContext="${hostContext:solr}">
- <core name="core0" instanceDir="core0" />
- <core name="core1" instanceDir="core1" />
- </cores>
-</solr>
+++ /dev/null
-# The number of milliseconds of each tick
-tickTime=2000
-# The number of ticks that the initial
-# synchronization phase can take
-initLimit=10
-# The number of ticks that can pass between
-# sending a request and getting an acknowledgement
-syncLimit=5
-
-# the directory where the snapshot is stored.
-# dataDir=/opt/zookeeper/data
-# NOTE: Solr defaults the dataDir to <solrHome>/zoo_data
-
-# the port at which the clients will connect
-# clientPort=2181
-# NOTE: Solr sets this based on zkRun / zkHost params
-
+++ /dev/null
-NAME="zk-lui-solr"
-HOST=localhost
-PID_FILE="./${NAME}.pid"
-LOG_FILE="./${NAME}.log"
-PORT=8983
-#ZKHOSTS=opencontent-solr.index:9983
-ZKRUN=yes
-BOOTSTRAP_CONF=./solr/collection1/conf
+++ /dev/null
-OPTIONS="-Djetty.port=7500 -DzkHost=opencontent-solr.index:9983"
-NAME=second
-PID_FILE="./${NAME}.pid"
-LOG_FILE="./${NAME}.log"
+++ /dev/null
-NAME="zk-lui-solr"
-HOST=localhost
-PID_FILE="./${NAME}.pid"
-LOG_FILE="./${NAME}.log"
-PORT=8983
-ZKRUN=yes
-BOOTSTRAP_CONF=./solr/collection1/conf
+++ /dev/null
-NAME="zk-lui-solr-second"
-HOST=localhost
-PID_FILE="./${NAME}.pid"
-LOG_FILE="./${NAME}.log"
-PORT=8984
-ZKHOSTS=localhost:9983
-#BOOTSTRAP_CONF=./solr/collection1/conf
-ZOOKEEPER=no
+++ /dev/null
-# Logging level
-solr.log=logs/
-log4j.rootLogger=INFO, file, CONSOLE
-
-log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
-
-log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
-log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x \u2013 %m%n
-
-#- size rotation with log cleanup.
-log4j.appender.file=org.apache.log4j.RollingFileAppender
-log4j.appender.file.MaxFileSize=4MB
-log4j.appender.file.MaxBackupIndex=9
-
-#- File to log to and log format
-log4j.appender.file.File=${solr.log}/solr.log
-log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n
-
-log4j.logger.org.apache.zookeeper=WARN
-log4j.logger.org.apache.hadoop=WARN
-
-# set to INFO to enable infostream log messages
-log4j.logger.org.apache.solr.update.LoggingInfoStream=OFF
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-Example Solr Home Directory
-=============================
-
-This directory is provided as an example of what a "Solr Home" directory
-should look like.
-
-It's not strictly necessary that you copy all of the files in this
-directory when setting up a new instance of Solr, but it is recommended.
-
-
-Basic Directory Structure
--------------------------
-
-The Solr Home directory typically contains the following...
-
-* solr.xml *
-
-This is the primary configuration file Solr looks for when starting.
-This file specifies the list of "SolrCores" it should load, and high
-level configuration options that should be used for all SolrCores.
-
-Please see the comments in ./solr.xml for more details.
-
-If no solr.xml file is found, then Solr assumes that there should be
-a single SolrCore named "collection1" and that the "Instance Directory"
-for collection1 should be the same as the Solr Home Directory.
-
-* Individual SolrCore Instance Directories *
-
-Although solr.xml can be configured to look for SolrCore Instance Directories
-in any path, simple sub-directories of the Solr Home Dir using relative paths
-are common for many installations. In this directory you can see the
-"./collection1" Instance Directory.
-
-* A Shared 'lib' Directory *
-
-Although solr.xml can be configured with an optional "sharedLib" attribute
-that can point to any path, it is common to use a "./lib" sub-directory of the
-Solr Home Directory.
-
-* ZooKeeper Files *
-
-When using SolrCloud using the embedded ZooKeeper option for Solr, it is
-common to have a "zoo.cfg" file and "zoo_data" directories in the Solr Home
-Directory. Please see the SolrCloud wiki page for more details...
-
-https://wiki.apache.org/solr/SolrCloud
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-Example SolrCore Instance Directory
-=============================
-
-This directory is provided as an example of what an "Instance Directory"
-should look like for a SolrCore
-
-It's not strictly necessary that you copy all of the files in this
-directory when setting up a new SolrCores, but it is recommended.
-
-
-Basic Directory Structure
--------------------------
-
-The Solr Home directory typically contains the following sub-directories...
-
- conf/
- This directory is mandatory and must contain your solrconfig.xml
- and schema.xml. Any other optional configuration files would also
- be kept here.
-
- data/
- This directory is the default location where Solr will keep your
- index, and is used by the replication scripts for dealing with
- snapshots. You can override this location in the
- conf/solrconfig.xml. Solr will create this directory if it does not
- already exist.
-
- lib/
- This directory is optional. If it exists, Solr will load any Jars
- found in this directory and use them to resolve any "plugins"
- specified in your solrconfig.xml or schema.xml (ie: Analyzers,
- Request Handlers, etc...). Alternatively you can use the <lib>
- syntax in conf/solrconfig.xml to direct Solr to your plugins. See
- the example conf/solrconfig.xml file for details.
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- The content of this page will be statically included into the top-
-right box of the cores overview page. Uncomment this as an example to
-see there the content will show up.
-
-<img src="img/ico/construction.png"> This line will appear at the top-
-right box on collection1's Overview
--->
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- admin-extra.menu-bottom.html -->
-<!--
-<li>
- <a href="#" style="background-image: url(img/ico/construction.png);">
- LAST ITEM
- </a>
-</li>
--->
+++ /dev/null
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- admin-extra.menu-top.html -->
-<!--
-<li>
- <a href="#" style="background-image: url(img/ico/construction.png);">
- FIRST ITEM
- </a>
-</li>
--->
+++ /dev/null
-<?xml version="1.0" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
-
-<currencyConfig version="1.0">
- <rates>
- <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
- <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
- <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
- <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
- <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
- <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
- <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
- <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
- <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
- <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
- <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
- <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
- <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
- <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
- <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
- <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
- <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
- <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
- <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
- <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
- <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
- <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
- <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
- <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
- <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
- <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
- <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
- <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
- <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
- <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
- <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
- <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
- <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
- <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
- <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
- <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
- <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
- <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
-
- <!-- Cross-rates for some common currencies -->
- <rate from="EUR" to="GBP" rate="0.869914" />
- <rate from="EUR" to="NOK" rate="7.800095" />
- <rate from="GBP" to="NOK" rate="8.966508" />
- </rates>
-</currencyConfig>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- If this file is found in the config directory, it will only be
- loaded once at startup. If it is found in Solr's data
- directory, it will be re-loaded every commit.
-
- See http://wiki.apache.org/solr/QueryElevationComponent for more info
-
--->
-<elevate>
- <query text="foo bar">
- <doc id="1" />
- <doc id="2" />
- <doc id="3" />
- </query>
-
- <query text="ipod">
- <doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
- <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
- </query>
-
-</elevate>
+++ /dev/null
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
+++ /dev/null
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
-d
-c
-jusqu
-quoiqu
-lorsqu
-puisqu
+++ /dev/null
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
+++ /dev/null
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
+++ /dev/null
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
+++ /dev/null
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
+++ /dev/null
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#名詞
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#名詞-一般
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#名詞-固有名詞
-#
-# noun-proper-misc: miscellaneous proper nouns
-#名詞-固有名詞-一般
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#名詞-固有名詞-人名
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. お市の方
-#名詞-固有名詞-人名-一般
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. 山田
-#名詞-固有名詞-人名-姓
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. 太郎
-#名詞-固有名詞-人名-名
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. 通産省, NHK
-#名詞-固有名詞-組織
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#名詞-固有名詞-地域
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. アジア, バルセロナ, 京都
-#名詞-固有名詞-地域-一般
-#
-# noun-proper-place-country: Country names.
-# e.g. 日本, オーストラリア
-#名詞-固有名詞-地域-国
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#名詞-代名詞
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
-#名詞-代名詞-一般
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
-#名詞-代名詞-縮約
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. 金曜, 一月, 午後, 少量
-#名詞-副詞可能
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (する, できる, なさる, くださる)
-# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
-#名詞-サ変接続
-#
-# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
-# e.g. 健康, 安易, 駄目, だめ
-#名詞-形容動詞語幹
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
-# e.g. 0, 1, 2, 何, 数, 幾
-#名詞-数
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#名詞-非自立
-#
-# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
-# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
-# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
-# わり, 割り, 割, ん-口語/, もん-口語/
-#名詞-非自立-一般
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
-# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
-# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
-# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
-# 儘, 侭, みぎり, 矢先
-#名詞-非自立-副詞可能
-#
-# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
-# with the stem よう(だ) ("you(da)").
-# e.g. よう, やう, 様 (よう)
-#名詞-非自立-助動詞語幹
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form な (aux "da").
-# e.g. みたい, ふう
-#名詞-非自立-形容動詞語幹
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#名詞-特殊
-#
-# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
-# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. そう
-#名詞-特殊-助動詞語幹
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#名詞-接尾
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# 接尾語 ("suffix") and is usually the last element in a compound noun.
-# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
-# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
-#名詞-接尾-一般
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. 君, 様, 著
-#名詞-接尾-人名
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. 町, 市, 県
-#名詞-接尾-地域
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before スル ("suru").
-# e.g. 化, 視, 分け, 入り, 落ち, 買い
-#名詞-接尾-サ変接続
-#
-# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
-# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. そう
-#名詞-接尾-助動詞語幹
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula だ ("da").
-# e.g. 的, げ, がち
-#名詞-接尾-形容動詞語幹
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
-#名詞-接尾-副詞可能
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
-#名詞-接尾-助数詞
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (楽し) さ, (考え) 方
-#名詞-接尾-特殊
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
-#名詞-接続詞的
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
-# semantically verb-like.
-# e.g. ごらん, ご覧, 御覧, 頂戴
-#名詞-動詞非自立的
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
-# is いわく ("iwaku").
-#名詞-引用文字列
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
-# behave like an adjective.
-# e.g. 申し訳, 仕方, とんでも, 違い
-#名詞-ナイ形容詞語幹
-#
-#####
-# prefix: unclassified prefixes
-#接頭詞
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
-#接頭詞-名詞接続
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by なる/なさる/くださる.
-# e.g. お (読みなさい), お (座り)
-#接頭詞-動詞接続
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. お (寒いですねえ), バカ (でかい)
-#接頭詞-形容詞接続
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. 約, およそ, 毎時
-#接頭詞-数接続
-#
-#####
-# verb: unclassified verbs
-#動詞
-#
-# verb-main:
-#動詞-自立
-#
-# verb-auxiliary:
-#動詞-非自立
-#
-# verb-suffix:
-#動詞-接尾
-#
-#####
-# adjective: unclassified adjectives
-#形容詞
-#
-# adjective-main:
-#形容詞-自立
-#
-# adjective-auxiliary:
-#形容詞-非自立
-#
-# adjective-suffix:
-#形容詞-接尾
-#
-#####
-# adverb: unclassified adverbs
-#副詞
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. あいかわらず, 多分
-#副詞-一般
-#
-# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
-# な, する, だ, etc.
-# e.g. こんなに, そんなに, あんなに, なにか, なんでも
-#副詞-助詞類接続
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
-# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
-# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
-#連体詞
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. が, けれども, そして, じゃあ, それどころか
-接続詞
-#
-#####
-# particle: unclassified particles.
-助詞
-#
-# particle-case: case particles where the subclassification is undefined.
-助詞-格助詞
-#
-# particle-case-misc: Case particles.
-# e.g. から, が, で, と, に, へ, より, を, の, にて
-助詞-格助詞-一般
-#
-# particle-case-quote: the "to" that appears after nouns, a person’s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
-助詞-格助詞-引用
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
-# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
-# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
-# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
-# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
-# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
-# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
-# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
-助詞-格助詞-連語
-#
-# particle-conjunctive:
-# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
-# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
-# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
-助詞-接続助詞
-#
-# particle-dependency:
-# e.g. こそ, さえ, しか, すら, は, も, ぞ
-助詞-係助詞
-#
-# particle-adverbial:
-# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
-# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
-# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
-# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
-# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
-助詞-副助詞
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (松島) や
-助詞-間投助詞
-#
-# particle-coordinate:
-# e.g. と, たり, だの, だり, とか, なり, や, やら
-助詞-並立助詞
-#
-# particle-final:
-# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
-# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
-助詞-終助詞
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
-# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
-# 「(祈りが届いたせい) か (, 試験に合格した.)」
-# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
-# e.g. か
-助詞-副助詞/並立助詞/終助詞
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-助詞-連体化
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. に, と
-助詞-副詞化
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
-助詞-特殊
-#
-#####
-# auxiliary-verb:
-助動詞
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
-# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
-#感動詞
-#
-#####
-# symbol: unclassified Symbols.
-記号
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [○◎@$〒→+]
-記号-一般
-#
-# symbol-comma: Commas
-# e.g. [,、]
-記号-読点
-#
-# symbol-period: Periods and full stops.
-# e.g. [..。]
-記号-句点
-#
-# symbol-space: Full-width whitespace.
-記号-空白
-#
-# symbol-open_bracket:
-# e.g. [({‘“『【]
-記号-括弧開
-#
-# symbol-close_bracket:
-# e.g. [)}’”』」】]
-記号-括弧閉
-#
-# symbol-alphabetic:
-#記号-アルファベット
-#
-#####
-# other: unclassified other
-#その他
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (だ)ァ
-その他-間投
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. あの, うんと, えと
-フィラー
-#
-#####
-# non-verbal: non-verbal sound.
-非言語音
-#
-#####
-# fragment:
-#語断片
-#
-#####
-# unknown: unknown part of speech.
-#未知語
-#
-##### End of file
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both أ and ا
-من
-ومن
-منها
-منه
-في
-وفي
-فيها
-فيه
-و
-ف
-ثم
-او
-أو
-ب
-بها
-به
-ا
-أ
-اى
-اي
-أي
-أى
-لا
-ولا
-الا
-ألا
-إلا
-لكن
-ما
-وما
-كما
-فما
-عن
-مع
-اذا
-إذا
-ان
-أن
-إن
-انها
-أنها
-إنها
-انه
-أنه
-إنه
-بان
-بأن
-فان
-فأن
-وان
-وأن
-وإن
-التى
-التي
-الذى
-الذي
-الذين
-الى
-الي
-إلى
-إلي
-على
-عليها
-عليه
-اما
-أما
-إما
-ايضا
-أيضا
-كل
-وكل
-لم
-ولم
-لن
-ولن
-هى
-هي
-هو
-وهى
-وهي
-وهو
-فهى
-فهي
-فهو
-انت
-أنت
-لك
-لها
-له
-هذه
-هذا
-تلك
-ذلك
-هناك
-كانت
-كان
-يكون
-تكون
-وكانت
-وكان
-غير
-بعض
-قد
-نحو
-بين
-بينما
-منذ
-ضمن
-حيث
-الان
-الآن
-خلال
-بعد
-قبل
-حتى
-عند
-عندما
-لدى
-جميع
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-а
-аз
-ако
-ала
-бе
-без
-беше
-би
-бил
-била
-били
-било
-близо
-бъдат
-бъде
-бяха
-в
-вас
-ваш
-ваша
-вероятно
-вече
-взема
-ви
-вие
-винаги
-все
-всеки
-всички
-всичко
-всяка
-във
-въпреки
-върху
-г
-ги
-главно
-го
-д
-да
-дали
-до
-докато
-докога
-дори
-досега
-доста
-е
-едва
-един
-ето
-за
-зад
-заедно
-заради
-засега
-затова
-защо
-защото
-и
-из
-или
-им
-има
-имат
-иска
-й
-каза
-как
-каква
-какво
-както
-какъв
-като
-кога
-когато
-което
-които
-кой
-който
-колко
-която
-къде
-където
-към
-ли
-м
-ме
-между
-мен
-ми
-мнозина
-мога
-могат
-може
-моля
-момента
-му
-н
-на
-над
-назад
-най
-направи
-напред
-например
-нас
-не
-него
-нея
-ни
-ние
-никой
-нито
-но
-някои
-някой
-няма
-обаче
-около
-освен
-особено
-от
-отгоре
-отново
-още
-пак
-по
-повече
-повечето
-под
-поне
-поради
-после
-почти
-прави
-пред
-преди
-през
-при
-пък
-първо
-с
-са
-само
-се
-сега
-си
-скоро
-след
-сме
-според
-сред
-срещу
-сте
-съм
-със
-също
-т
-тази
-така
-такива
-такъв
-там
-твой
-те
-тези
-ти
-тн
-то
-това
-тогава
-този
-той
-толкова
-точно
-трябва
-тук
-тъй
-тя
-тях
-у
-харесва
-ч
-че
-често
-чрез
-ще
-щом
-я
+++ /dev/null
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ací
-ah
-així
-això
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-allà
-allí
-allò
-altra
-altre
-altres
-amb
-ambdós
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aquí
-baix
-cada
-cadascú
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-després
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-érem
-eren
-éreu
-es
-és
-esta
-està
-estàvem
-estaven
-estàveu
-esteu
-et
-etc
-ets
-fins
-fora
-gairebé
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-més
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-només
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-però
-perquè
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-què
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-sóc
-solament
-sols
-son
-són
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-també
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
+++ /dev/null
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-tímto
-budeš
-budem
-byli
-jseš
-můj
-svým
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-proč
-máte
-tato
-kam
-tohoto
-kdo
-kteří
-mi
-nám
-tom
-tomuto
-mít
-nic
-proto
-kterou
-byla
-toho
-protože
-asi
-ho
-naši
-napište
-re
-což
-tím
-takže
-svých
-její
-svými
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-pravé
-ji
-nad
-nejsou
-či
-pod
-téma
-mezi
-přes
-ty
-pak
-vám
-ani
-když
-však
-neg
-jsem
-tento
-článku
-články
-aby
-jsme
-před
-pta
-jejich
-byl
-ještě
-až
-bez
-také
-pouze
-první
-vaše
-která
-nás
-nový
-tipy
-pokud
-může
-strana
-jeho
-své
-jiné
-zprávy
-nové
-není
-vás
-jen
-podle
-zde
-už
-být
-více
-bude
-již
-než
-který
-by
-které
-co
-nebo
-ten
-tak
-má
-při
-od
-po
-jsou
-jak
-další
-ale
-si
-se
-ve
-to
-jako
-za
-zpět
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-přičemž
-já
-on
-ona
-ono
-oni
-ony
-my
-vy
-jí
-ji
-mě
-mne
-jemu
-tomu
-těm
-těmu
-němu
-němuž
-jehož
-jíž
-jelikož
-jež
-jakož
-načež
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-på | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-når | when
-være | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-også | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-været | be
-thi | for (conj)
-jer | you
-sådan | such, like this/like that
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-daß | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-für | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-können | can
-könnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-über | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-während | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-würde | would
-würden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
+++ /dev/null
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use 'σ' instead of 'ς'
-ο
-η
-το
-οι
-τα
-του
-τησ
-των
-τον
-την
-και
-κι
-κ
-ειμαι
-εισαι
-ειναι
-ειμαστε
-ειστε
-στο
-στον
-στη
-στην
-μα
-αλλα
-απο
-για
-προσ
-με
-σε
-ωσ
-παρα
-αντι
-κατα
-μετα
-θα
-να
-δε
-δεν
-μη
-μην
-επι
-ενω
-εαν
-αν
-τοτε
-που
-πωσ
-ποιοσ
-ποια
-ποιο
-ποιοι
-ποιεσ
-ποιων
-ποιουσ
-αυτοσ
-αυτη
-αυτο
-αυτοι
-αυτων
-αυτουσ
-αυτεσ
-αυτα
-εκεινοσ
-εκεινη
-εκεινο
-εκεινοι
-εκεινεσ
-εκεινα
-εκεινων
-εκεινουσ
-οπωσ
-ομωσ
-ισωσ
-οσο
-οτι
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-más | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-sí | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | está from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-también | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | están from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | había from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-mí | me
-antes | before
-algunos | some
-qué | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-él | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-tú | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-mío | mine
-mía |
-míos |
-mías |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-estás
-está
-estamos
-estáis
-están
-esté
-estés
-estemos
-estéis
-estén
-estaré
-estarás
-estará
-estaremos
-estaréis
-estarán
-estaría
-estarías
-estaríamos
-estaríais
-estarían
-estaba
-estabas
-estábamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuviéramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuviésemos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-habéis
-han
-haya
-hayas
-hayamos
-hayáis
-hayan
-habré
-habrás
-habrá
-habremos
-habréis
-habrán
-habría
-habrías
-habríamos
-habríais
-habrían
-había
-habías
-habíamos
-habíais
-habían
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubiéramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubiésemos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-seáis
-sean
-seré
-serás
-será
-seremos
-seréis
-serán
-sería
-serías
-seríamos
-seríais
-serían
-era
-eras
-éramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fuéramos
-fuerais
-fueran
-fuese
-fueses
-fuésemos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-tenéis
-tienen
-tenga
-tengas
-tengamos
-tengáis
-tengan
-tendré
-tendrás
-tendrá
-tendremos
-tendréis
-tendrán
-tendría
-tendrías
-tendríamos
-tendríais
-tendrían
-tenía
-tenías
-teníamos
-teníais
-tenían
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuviéramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuviésemos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
+++ /dev/null
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic 'ي' instead of 'ی'
-انان
-نداشته
-سراسر
-خياه
-ايشان
-وي
-تاكنون
-بيشتري
-دوم
-پس
-ناشي
-وگو
-يا
-داشتند
-سپس
-هنگام
-هرگز
-پنج
-نشان
-امسال
-ديگر
-گروهي
-شدند
-چطور
-ده
-و
-دو
-نخستين
-ولي
-چرا
-چه
-وسط
-ه
-كدام
-قابل
-يك
-رفت
-هفت
-همچنين
-در
-هزار
-بله
-بلي
-شايد
-اما
-شناسي
-گرفته
-دهد
-داشته
-دانست
-داشتن
-خواهيم
-ميليارد
-وقتيكه
-امد
-خواهد
-جز
-اورده
-شده
-بلكه
-خدمات
-شدن
-برخي
-نبود
-بسياري
-جلوگيري
-حق
-كردند
-نوعي
-بعري
-نكرده
-نظير
-نبايد
-بوده
-بودن
-داد
-اورد
-هست
-جايي
-شود
-دنبال
-داده
-بايد
-سابق
-هيچ
-همان
-انجا
-كمتر
-كجاست
-گردد
-كسي
-تر
-مردم
-تان
-دادن
-بودند
-سري
-جدا
-ندارند
-مگر
-يكديگر
-دارد
-دهند
-بنابراين
-هنگامي
-سمت
-جا
-انچه
-خود
-دادند
-زياد
-دارند
-اثر
-بدون
-بهترين
-بيشتر
-البته
-به
-براساس
-بيرون
-كرد
-بعضي
-گرفت
-توي
-اي
-ميليون
-او
-جريان
-تول
-بر
-مانند
-برابر
-باشيم
-مدتي
-گويند
-اكنون
-تا
-تنها
-جديد
-چند
-بي
-نشده
-كردن
-كردم
-گويد
-كرده
-كنيم
-نمي
-نزد
-روي
-قصد
-فقط
-بالاي
-ديگران
-اين
-ديروز
-توسط
-سوم
-ايم
-دانند
-سوي
-استفاده
-شما
-كنار
-داريم
-ساخته
-طور
-امده
-رفته
-نخست
-بيست
-نزديك
-طي
-كنيد
-از
-انها
-تمامي
-داشت
-يكي
-طريق
-اش
-چيست
-روب
-نمايد
-گفت
-چندين
-چيزي
-تواند
-ام
-ايا
-با
-ان
-ايد
-ترين
-اينكه
-ديگري
-راه
-هايي
-بروز
-همچنان
-پاعين
-كس
-حدود
-مختلف
-مقابل
-چيز
-گيرد
-ندارد
-ضد
-همچون
-سازي
-شان
-مورد
-باره
-مرسي
-خويش
-برخوردار
-چون
-خارج
-شش
-هنوز
-تحت
-ضمن
-هستيم
-گفته
-فكر
-بسيار
-پيش
-براي
-روزهاي
-انكه
-نخواهد
-بالا
-كل
-وقتي
-كي
-چنين
-كه
-گيري
-نيست
-است
-كجا
-كند
-نيز
-يابد
-بندي
-حتي
-توانند
-عقب
-خواست
-كنند
-بين
-تمام
-همه
-ما
-باشند
-مثل
-شد
-اري
-باشد
-اره
-طبق
-بعد
-اگر
-صورت
-غير
-جاي
-بيش
-ريزي
-اند
-زيرا
-چگونه
-بار
-لطفا
-مي
-درباره
-من
-ديده
-همين
-گذاري
-برداري
-علت
-گذاشته
-هم
-فوق
-نه
-ها
-شوند
-اباد
-همواره
-هر
-اول
-خواهند
-چهار
-نام
-امروز
-مان
-هاي
-قبل
-كنم
-سعي
-تازه
-را
-هستند
-زير
-جلوي
-عنوان
-بود
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-minä minun minut minua minussa minusta minuun minulla minulta minulle | I
-sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
-me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
-te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
-he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
-
-tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
-tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
-nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
-
-kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
-mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
-mitkä | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-että | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sekä | and
-sillä | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-même | same; as in moi-même (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-à | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-cela | that
-celà | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
+++ /dev/null
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bhúr
-caoga
-ceathair
-ceathrar
-chomh
-chtó
-chuig
-chun
-cois
-céad
-cúig
-cúigear
-d'
-daichead
-dar
-de
-deich
-deichniúr
-den
-dhá
-do
-don
-dtí
-dá
-dár
-dó
-faoi
-faoin
-faoina
-faoinár
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-inár
-is
-le
-leis
-lena
-lenár
-m'
-mar
-mo
-mé
-na
-nach
-naoi
-naonúr
-ná
-ní
-níor
-nó
-nócha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seachtó
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-sé
-sí
-tar
-thar
-thú
-triúr
-trí
-trína
-trínár
-tríocha
-tú
-um
-ár
-é
-éis
-í
-ó
-ón
-óna
-ónár
+++ /dev/null
-# galican stopwords
-a
-aínda
-alí
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aquí
-ao
-aos
-as
-así
-á
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalgún
-dalgúns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-está
-están
-este
-estes
-estiven
-estou
-eu
-é
-facer
-foi
-foron
-fun
-había
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-miña
-miñas
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-nós
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-ó
-ós
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-senón
-ser
-seu
-seus
-sexa
-sido
-sobre
-súa
-súas
-tamén
-tan
-te
-ten
-teñen
-teño
-ter
-teu
-teus
-ti
-tido
-tiña
-tiven
-túa
-túas
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-vós
+++ /dev/null
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-अंदर
-अत
-अपना
-अपनी
-अपने
-अभी
-आदि
-आप
-इत्यादि
-इन
-इनका
-इन्हीं
-इन्हें
-इन्हों
-इस
-इसका
-इसकी
-इसके
-इसमें
-इसी
-इसे
-उन
-उनका
-उनकी
-उनके
-उनको
-उन्हीं
-उन्हें
-उन्हों
-उस
-उसके
-उसी
-उसे
-एक
-एवं
-एस
-ऐसे
-और
-कई
-कर
-करता
-करते
-करना
-करने
-करें
-कहते
-कहा
-का
-काफ़ी
-कि
-कितना
-किन्हें
-किन्हों
-किया
-किर
-किस
-किसी
-किसे
-की
-कुछ
-कुल
-के
-को
-कोई
-कौन
-कौनसा
-गया
-घर
-जब
-जहाँ
-जा
-जितना
-जिन
-जिन्हें
-जिन्हों
-जिस
-जिसे
-जीधर
-जैसा
-जैसे
-जो
-तक
-तब
-तरह
-तिन
-तिन्हें
-तिन्हों
-तिस
-तिसे
-तो
-था
-थी
-थे
-दबारा
-दिया
-दुसरा
-दूसरे
-दो
-द्वारा
-न
-नहीं
-ना
-निहायत
-नीचे
-ने
-पर
-पर
-पहले
-पूरा
-पे
-फिर
-बनी
-बही
-बहुत
-बाद
-बाला
-बिलकुल
-भी
-भीतर
-मगर
-मानो
-मे
-में
-यदि
-यह
-यहाँ
-यही
-या
-यिह
-ये
-रखें
-रहा
-रहे
-ऱ्वासा
-लिए
-लिये
-लेकिन
-व
-वर्ग
-वह
-वह
-वहाँ
-वहीं
-वाले
-वुह
-वे
-वग़ैरह
-संग
-सकता
-सकते
-सबसे
-सभी
-साथ
-साबुत
-साभ
-सारा
-से
-सो
-ही
-हुआ
-हुई
-हुए
-है
-हैं
-हो
-होता
-होती
-होते
-होना
-होने
-# additional normalized forms of the above
-अपनि
-जेसे
-होति
-सभि
-तिंहों
-इंहों
-दवारा
-इसि
-किंहें
-थि
-उंहों
-ओर
-जिंहें
-वहिं
-अभि
-बनि
-हि
-उंहिं
-उंहें
-हें
-वगेरह
-एसे
-रवासा
-कोन
-निचे
-काफि
-उसि
-पुरा
-भितर
-हे
-बहि
-वहां
-कोइ
-यहां
-जिंहों
-तिंहें
-किसि
-कइ
-यहि
-इंहिं
-जिधर
-इंहें
-अदि
-इतयादि
-हुइ
-कोनसा
-इसकि
-दुसरे
-जहां
-अप
-किंहों
-उनकि
-भि
-वरग
-हुअ
-जेसा
-नहिं
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-által
-általában
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-amíg
-amikor
-át
-abban
-ahhoz
-annak
-arra
-arról
-az
-azok
-azon
-azt
-azzal
-azért
-aztán
-azután
-azonban
-bár
-be
-belül
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-egész
-egy
-egyes
-egyetlen
-egyéb
-egyik
-egyre
-ekkor
-el
-elég
-ellen
-elő
-először
-előtt
-első
-én
-éppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ezért
-és
-fel
-felé
-hanem
-hiszen
-hogy
-hogyan
-igen
-így
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ismét
-itt
-jó
-jól
-jobban
-kell
-kellett
-keresztül
-keressünk
-ki
-kívül
-között
-közül
-legalább
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-magát
-majd
-majd
-már
-más
-másik
-meg
-még
-mellett
-mert
-mely
-melyek
-mi
-mit
-míg
-miért
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-néha
-nekem
-neki
-nem
-néhány
-nélkül
-nincs
-olyan
-ott
-össze
-ő
-ők
-őket
-pedig
-persze
-rá
-s
-saját
-sem
-semmi
-sok
-sokat
-sokkal
-számára
-szemben
-szerint
-szinte
-talán
-tehát
-teljes
-tovább
-továbbá
-több
-úgy
-ugyanis
-új
-újabb
-újra
-után
-utána
-utolsó
-vagy
-vagyis
-valaki
-valami
-valamint
-való
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
+++ /dev/null
-# example set of Armenian stopwords.
-այդ
-այլ
-այն
-այս
-դու
-դուք
-եմ
-են
-ենք
-ես
-եք
-է
-էի
-էին
-էինք
-էիր
-էիք
-էր
-ըստ
-թ
-ի
-ին
-իսկ
-իր
-կամ
-համար
-հետ
-հետո
-մենք
-մեջ
-մի
-ն
-նա
-նաև
-նրա
-նրանք
-որ
-որը
-որոնք
-որպես
-ու
-ում
-պիտի
-վրա
-և
+++ /dev/null
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perché | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-più | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avrò
-avrai
-avrà
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-è
-siamo
-siete
-sia
-siate
-siano
-sarò
-sarai
-sarà
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-farò
-farai
-farà
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-starò
-starai
-starà
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
+++ /dev/null
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-の
-に
-は
-を
-た
-が
-で
-て
-と
-し
-れ
-さ
-ある
-いる
-も
-する
-から
-な
-こと
-として
-い
-や
-れる
-など
-なっ
-ない
-この
-ため
-その
-あっ
-よう
-また
-もの
-という
-あり
-まで
-られ
-なる
-へ
-か
-だ
-これ
-によって
-により
-おり
-より
-による
-ず
-なり
-られる
-において
-ば
-なかっ
-なく
-しかし
-について
-せ
-だっ
-その後
-できる
-それ
-う
-ので
-なお
-のみ
-でき
-き
-つ
-における
-および
-いう
-さらに
-でも
-ら
-たり
-その他
-に関する
-たち
-ます
-ん
-なら
-に対して
-特に
-せる
-及び
-これら
-とき
-では
-にて
-ほか
-ながら
-うち
-そして
-とともに
-ただし
-かつて
-それぞれ
-または
-お
-ほど
-ものの
-に対する
-ほとんど
-と共に
-といった
-です
-とも
-ところ
-ここ
-##### End of file
+++ /dev/null
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apakš
-ārpus
-augšpus
-bez
-caur
-dēļ
-gar
-iekš
-iz
-kopš
-labad
-lejpus
-līdz
-no
-otrpus
-pa
-par
-pār
-pēc
-pie
-pirms
-pret
-priekš
-starp
-šaipus
-uz
-viņpus
-virs
-virspus
-zem
-apakšpus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tomēr
-tikko
-turpretī
-arī
-kaut
-gan
-tādēļ
-tā
-ne
-tikvien
-vien
-kā
-ir
-te
-vai
-kamēr
-# Particles
-ar
-diezin
-droši
-diemžēl
-nebūt
-ik
-it
-taču
-nu
-pat
-tiklab
-iekšpus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iekām
-iekāms
-kolīdz
-līdzko
-tiklīdz
-jebšu
-tālab
-tāpēc
-nekā
-itin
-jā
-jau
-jel
-nē
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-būt
-biju
-biji
-bija
-bijām
-bijāt
-esmu
-esi
-esam
-esat
-būšu
-būsi
-būs
-būsim
-būsiet
-tikt
-tiku
-tiki
-tika
-tikām
-tikāt
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tikšu
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tapāt
-topat
-tapšu
-tapsi
-taps
-tapsim
-tapsiet
-kļūt
-kļuvu
-kļuvi
-kļuva
-kļuvām
-kļuvāt
-kļūstu
-kļūsti
-kļūst
-kļūstam
-kļūstat
-kļūšu
-kļūsi
-kļūs
-kļūsim
-kļūsiet
-# verbs
-varēt
-varēju
-varējām
-varēšu
-varēsim
-var
-varēji
-varējāt
-varēsi
-varēsiet
-varat
-varēja
-varēs
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
-
-og | and
-i | in
-jeg | I
-det | it/this/that
-at | to (w. inf.)
-en | a/an
-et | a/an
-den | it/this/that
-til | to
-er | is/am/are
-som | who/that
-på | on
-de | they / you(formal)
-med | with
-han | he
-av | of
-ikke | not
-ikkje | not *
-der | there
-så | so
-var | was/were
-meg | me
-seg | you
-men | but
-ett | one
-har | have
-om | about
-vi | we
-min | my
-mitt | my
-ha | have
-hadde | had
-hun | she
-nå | now
-over | over
-da | when/as
-ved | by/know
-fra | from
-du | you
-ut | out
-sin | your
-dem | them
-oss | us
-opp | up
-man | you/one
-kan | can
-hans | his
-hvor | where
-eller | or
-hva | what
-skal | shall/must
-selv | self (reflective)
-sjøl | self (reflective)
-her | here
-alle | all
-vil | will
-bli | become
-ble | became
-blei | became *
-blitt | have become
-kunne | could
-inn | in
-når | when
-være | be
-kom | come
-noen | some
-noe | some
-ville | would
-dere | you
-som | who/which/that
-deres | their/theirs
-kun | only/just
-ja | yes
-etter | after
-ned | down
-skulle | should
-denne | this
-for | for/because
-deg | you
-si | hers/his
-sine | hers/his
-sitt | hers/his
-mot | against
-å | to
-meget | much
-hvorfor | why
-dette | this
-disse | these/those
-uten | without
-hvordan | how
-ingen | none
-din | your
-ditt | your
-blir | become
-samme | same
-hvilken | which
-hvilke | which (plural)
-sånn | such a
-inni | inside/within
-mellom | between
-vår | our
-hver | each
-hvem | who
-vors | us/ours
-hvis | whose
-både | both
-bare | only/just
-enn | than
-fordi | as/because
-før | before
-mange | many
-også | also
-slik | just
-vært | been
-være | to be
-båe | both *
-begge | both
-siden | since
-dykk | your *
-dykkar | yours *
-dei | they *
-deira | them *
-deires | theirs *
-deim | them *
-di | your (fem.) *
-då | as/when *
-eg | I *
-ein | a/an *
-eit | a/an *
-eitt | a/an *
-elles | or *
-honom | he *
-hjå | at *
-ho | she *
-hoe | she *
-henne | her
-hennar | her/hers
-hennes | hers
-hoss | how *
-hossen | how *
-ikkje | not *
-ingi | noone *
-inkje | noone *
-korleis | how *
-korso | how *
-kva | what/which *
-kvar | where *
-kvarhelst | where *
-kven | who/whom *
-kvi | why *
-kvifor | why *
-me | we *
-medan | while *
-mi | my *
-mine | my *
-mykje | much *
-no | now *
-nokon | some (masc./neut.) *
-noka | some (fem.) *
-nokor | some *
-noko | some *
-nokre | some *
-si | his/hers *
-sia | since *
-sidan | since *
-so | so *
-somt | some *
-somme | some *
-um | about*
-upp | up *
-vere | be *
-vore | was *
-verte | become *
-vort | become *
-varte | became *
-vart | became *
-
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Portuguese stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | of, from
-a | the; to, at; her
-o | the; him
-que | who, that
-e | and
-do | de + o
-da | de + a
-em | in
-um | a
-para | for
- | é from SER
-com | with
-não | not, no
-uma | a
-os | the; them
-no | em + o
-se | himself etc
-na | em + a
-por | for
-mais | more
-as | the; them
-dos | de + os
-como | as, like
-mas | but
- | foi from SER
-ao | a + o
-ele | he
-das | de + as
- | tem from TER
-à | a + a
-seu | his
-sua | her
-ou | or
- | ser from SER
-quando | when
-muito | much
- | há from HAV
-nos | em + os; us
-já | already, now
- | está from EST
-eu | I
-também | also
-só | only, just
-pelo | per + o
-pela | per + a
-até | up to
-isso | that
-ela | he
-entre | between
- | era from SER
-depois | after
-sem | without
-mesmo | same
-aos | a + os
- | ter from TER
-seus | his
-quem | whom
-nas | em + as
-me | me
-esse | that
-eles | they
- | estão from EST
-você | you
- | tinha from TER
- | foram from SER
-essa | that
-num | em + um
-nem | nor
-suas | her
-meu | my
-às | a + as
-minha | my
- | têm from TER
-numa | em + uma
-pelos | per + os
-elas | they
- | havia from HAV
- | seja from SER
-qual | which
- | será from SER
-nós | we
- | tenho from TER
-lhe | to him, her
-deles | of them
-essas | those
-esses | those
-pelas | per + as
-este | this
- | fosse from SER
-dele | of him
-
- | other words. There are many contractions such as naquele = em+aquele,
- | mo = me+o, but they are rare.
- | Indefinite article plural forms are also rare.
-
-tu | thou
-te | thee
-vocês | you (plural)
-vos | you
-lhes | to them
-meus | my
-minhas
-teu | thy
-tua
-teus
-tuas
-nosso | our
-nossa
-nossos
-nossas
-
-dela | of her
-delas | of them
-
-esta | this
-estes | these
-estas | these
-aquele | that
-aquela | that
-aqueles | those
-aquelas | those
-isto | this
-aquilo | that
-
- | forms of estar, to be (not including the infinitive):
-estou
-está
-estamos
-estão
-estive
-esteve
-estivemos
-estiveram
-estava
-estávamos
-estavam
-estivera
-estivéramos
-esteja
-estejamos
-estejam
-estivesse
-estivéssemos
-estivessem
-estiver
-estivermos
-estiverem
-
- | forms of haver, to have (not including the infinitive):
-hei
-há
-havemos
-hão
-houve
-houvemos
-houveram
-houvera
-houvéramos
-haja
-hajamos
-hajam
-houvesse
-houvéssemos
-houvessem
-houver
-houvermos
-houverem
-houverei
-houverá
-houveremos
-houverão
-houveria
-houveríamos
-houveriam
-
- | forms of ser, to be (not including the infinitive):
-sou
-somos
-são
-era
-éramos
-eram
-fui
-foi
-fomos
-foram
-fora
-fôramos
-seja
-sejamos
-sejam
-fosse
-fôssemos
-fossem
-for
-formos
-forem
-serei
-será
-seremos
-serão
-seria
-seríamos
-seriam
-
- | forms of ter, to have (not including the infinitive):
-tenho
-tem
-temos
-tém
-tinha
-tínhamos
-tinham
-tive
-teve
-tivemos
-tiveram
-tivera
-tivéramos
-tenha
-tenhamos
-tenham
-tivesse
-tivéssemos
-tivessem
-tiver
-tivermos
-tiverem
-terei
-terá
-teremos
-terão
-teria
-teríamos
-teriam
+++ /dev/null
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-acea
-aceasta
-această
-aceea
-acei
-aceia
-acel
-acela
-acele
-acelea
-acest
-acesta
-aceste
-acestea
-aceşti
-aceştia
-acolo
-acum
-ai
-aia
-aibă
-aici
-al
-ăla
-ale
-alea
-ălea
-altceva
-altcineva
-am
-ar
-are
-aş
-aşadar
-asemenea
-asta
-ăsta
-astăzi
-astea
-ăstea
-ăştia
-asupra
-aţi
-au
-avea
-avem
-aveţi
-azi
-bine
-bucur
-bună
-ca
-că
-căci
-când
-care
-cărei
-căror
-cărui
-cât
-câte
-câţi
-către
-câtva
-ce
-cel
-ceva
-chiar
-cînd
-cine
-cineva
-cît
-cîte
-cîţi
-cîtva
-contra
-cu
-cum
-cumva
-curând
-curînd
-da
-dă
-dacă
-dar
-datorită
-de
-deci
-deja
-deoarece
-departe
-deşi
-din
-dinaintea
-dintr
-dintre
-drept
-după
-ea
-ei
-el
-ele
-eram
-este
-eşti
-eu
-face
-fără
-fi
-fie
-fiecare
-fii
-fim
-fiţi
-iar
-ieri
-îi
-îl
-îmi
-împotriva
-în
-înainte
-înaintea
-încât
-încît
-încotro
-între
-întrucât
-întrucît
-îţi
-la
-lângă
-le
-li
-lîngă
-lor
-lui
-mă
-mâine
-mea
-mei
-mele
-mereu
-meu
-mi
-mine
-mult
-multă
-mulţi
-ne
-nicăieri
-nici
-nimeni
-nişte
-noastră
-noastre
-noi
-noştri
-nostru
-nu
-ori
-oricând
-oricare
-oricât
-orice
-oricînd
-oricine
-oricît
-oricum
-oriunde
-până
-pe
-pentru
-peste
-pînă
-poate
-pot
-prea
-prima
-primul
-prin
-printr
-sa
-să
-săi
-sale
-sau
-său
-se
-şi
-sînt
-sîntem
-sînteţi
-spre
-sub
-sunt
-suntem
-sunteţi
-ta
-tăi
-tale
-tău
-te
-ţi
-ţie
-tine
-toată
-toate
-tot
-toţi
-totuşi
-tu
-un
-una
-unde
-undeva
-unei
-unele
-uneori
-unor
-vă
-vi
-voastră
-voastre
-voi
-voştri
-vostru
-vouă
-vreo
-vreun
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | a russian stop word list. comments begin with vertical bar. each stop
- | word is at the start of a line.
-
- | this is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | letter `ё' is translated to `е'.
-
-и | and
-в | in/into
-во | alternative form
-не | not
-что | what/that
-он | he
-на | on/onto
-я | i
-с | from
-со | alternative form
-как | how
-а | milder form of `no' (but)
-то | conjunction and form of `that'
-все | all
-она | she
-так | so, thus
-его | him
-но | but
-да | yes/and
-ты | thou
-к | towards, by
-у | around, chez
-же | intensifier particle
-вы | you
-за | beyond, behind
-бы | conditional/subj. particle
-по | up to, along
-только | only
-ее | her
-мне | to me
-было | it was
-вот | here is/are, particle
-от | away from
-меня | me
-еще | still, yet, more
-нет | no, there isnt/arent
-о | about
-из | out of
-ему | to him
-теперь | now
-когда | when
-даже | even
-ну | so, well
-вдруг | suddenly
-ли | interrogative particle
-если | if
-уже | already, but homonym of `narrower'
-или | or
-ни | neither
-быть | to be
-был | he was
-него | prepositional form of его
-до | up to
-вас | you accusative
-нибудь | indef. suffix preceded by hyphen
-опять | again
-уж | already, but homonym of `adder'
-вам | to you
-сказал | he said
-ведь | particle `after all'
-там | there
-потом | then
-себя | oneself
-ничего | nothing
-ей | to her
-может | usually with `быть' as `maybe'
-они | they
-тут | here
-где | where
-есть | there is/are
-надо | got to, must
-ней | prepositional form of ей
-для | for
-мы | we
-тебя | thee
-их | them, their
-чем | than
-была | she was
-сам | self
-чтоб | in order to
-без | without
-будто | as if
-человек | man, person, one
-чего | genitive form of `what'
-раз | once
-тоже | also
-себе | to oneself
-под | beneath
-жизнь | life
-будет | will be
-ж | short form of intensifer particle `же'
-тогда | then
-кто | who
-этот | this
-говорил | was saying
-того | genitive form of `that'
-потому | for that reason
-этого | genitive form of `this'
-какой | which
-совсем | altogether
-ним | prepositional form of `его', `они'
-здесь | here
-этом | prepositional form of `этот'
-один | one
-почти | almost
-мой | my
-тем | instrumental/dative plural of `тот', `то'
-чтобы | full form of `in order that'
-нее | her (acc.)
-кажется | it seems
-сейчас | now
-были | they were
-куда | where to
-зачем | why
-сказать | to say
-всех | all (acc., gen. preposn. plural)
-никогда | never
-сегодня | today
-можно | possible, one can
-при | by
-наконец | finally
-два | two
-об | alternative form of `о', about
-другой | another
-хоть | even
-после | after
-над | above
-больше | more
-тот | that one (masc.)
-через | across, in
-эти | these
-нас | us
-про | about
-всего | in all, only, of all
-них | prepositional form of `они' (they)
-какая | which, feminine
-много | lots
-разве | interrogative particle
-сказала | she said
-три | three
-эту | this, acc. fem. sing.
-моя | my, feminine
-впрочем | moreover, besides
-хорошо | good
-свою | ones own, acc. fem. sing.
-этой | oblique form of `эта', fem. `this'
-перед | in front of
-иногда | sometimes
-лучше | better
-чуть | a little
-том | preposn. form of `that one'
-нельзя | one must not
-такой | such a one
-им | to them
-более | more
-всегда | always
-конечно | of course
-всю | acc. fem. sing of `all'
-между | between
-
-
- | b: some paradigms
- |
- | personal pronouns
- |
- | я меня мне мной [мною]
- | ты тебя тебе тобой [тобою]
- | он его ему им [него, нему, ним]
- | она ее эи ею [нее, нэи, нею]
- | оно его ему им [него, нему, ним]
- |
- | мы нас нам нами
- | вы вас вам вами
- | они их им ими [них, ним, ними]
- |
- | себя себе собой [собою]
- |
- | demonstrative pronouns: этот (this), тот (that)
- |
- | этот эта это эти
- | этого эты это эти
- | этого этой этого этих
- | этому этой этому этим
- | этим этой этим [этою] этими
- | этом этой этом этих
- |
- | тот та то те
- | того ту то те
- | того той того тех
- | тому той тому тем
- | тем той тем [тою] теми
- | том той том тех
- |
- | determinative pronouns
- |
- | (a) весь (all)
- |
- | весь вся все все
- | всего всю все все
- | всего всей всего всех
- | всему всей всему всем
- | всем всей всем [всею] всеми
- | всем всей всем всех
- |
- | (b) сам (himself etc)
- |
- | сам сама само сами
- | самого саму само самих
- | самого самой самого самих
- | самому самой самому самим
- | самим самой самим [самою] самими
- | самом самой самом самих
- |
- | stems of verbs `to be', `to have', `to do' and modal
- |
- | быть бы буд быв есть суть
- | име
- | дел
- | мог мож мочь
- | уме
- | хоч хот
- | долж
- | можн
- | нужн
- | нельзя
-
+++ /dev/null
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- | så = so, but also seed. These are indicated clearly below.
-
-och | and
-det | it, this/that
-att | to (with infinitive)
-i | in, at
-en | a
-jag | I
-hon | she
-som | who, that
-han | he
-på | on
-den | it, this/that
-med | with
-var | where, each
-sig | him(self) etc
-för | for
-så | so (also: seed)
-till | to
-är | is
-men | but
-ett | a
-om | if; around, about
-hade | had
-de | they, these/those
-av | of
-icke | not, no
-mig | me
-du | you
-henne | her
-då | then, when
-sin | his
-nu | now
-har | have
-inte | inte någon = no one
-hans | his
-honom | him
-skulle | 'sake'
-hennes | her
-där | there
-min | my
-man | one (pronoun)
-ej | nor
-vid | at, by, on (also: vast)
-kunde | could
-något | some etc
-från | from, off
-ut | out
-när | when
-efter | after, behind
-upp | up
-vi | we
-dem | them
-vara | be
-vad | what
-över | over
-än | than
-dig | you
-kan | can
-sina | his
-här | here
-ha | have
-mot | towards
-alla | all
-under | under (also: wonder)
-någon | some etc
-eller | or (else)
-allt | all
-mycket | much
-sedan | since
-ju | why
-denna | this/that
-själv | myself, yourself etc
-detta | this/that
-åt | to
-utan | without
-varit | was
-hur | how
-ingen | no
-mitt | my
-ni | you
-bli | to be, become
-blev | from bli
-oss | us
-din | thy
-dessa | these/those
-några | some etc
-deras | their
-blir | from bli
-mina | my
-samma | (the) same
-vilken | who, that
-er | you, your
-sådan | such a
-vår | our
-blivit | from bli
-dess | its
-inom | within
-mellan | between
-sådant | such a
-varför | why
-varje | each
-vilka | who, that
-ditt | thy
-vem | who
-vilket | who, that
-sitta | his
-sådana | such a
-vart | each
-dina | thy
-vars | whose
-vårt | our
-våra | our
-ert | your
-era | your
-vilkas | whose
-
+++ /dev/null
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
+++ /dev/null
-# Turkish stopwords from LUCENE-559
-# merged with the list from "Information Retrieval on Turkish Texts"
-# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
-acaba
-altmış
-altı
-ama
-ancak
-arada
-aslında
-ayrıca
-bana
-bazı
-belki
-ben
-benden
-beni
-benim
-beri
-beş
-bile
-bin
-bir
-birçok
-biri
-birkaç
-birkez
-birşey
-birşeyi
-biz
-bize
-bizden
-bizi
-bizim
-böyle
-böylece
-bu
-buna
-bunda
-bundan
-bunlar
-bunları
-bunların
-bunu
-bunun
-burada
-çok
-çünkü
-da
-daha
-dahi
-de
-defa
-değil
-diğer
-diye
-doksan
-dokuz
-dolayı
-dolayısıyla
-dört
-edecek
-eden
-ederek
-edilecek
-ediliyor
-edilmesi
-ediyor
-eğer
-elli
-en
-etmesi
-etti
-ettiği
-ettiğini
-gibi
-göre
-halen
-hangi
-hatta
-hem
-henüz
-hep
-hepsi
-her
-herhangi
-herkesin
-hiç
-hiçbir
-için
-iki
-ile
-ilgili
-ise
-işte
-itibaren
-itibariyle
-kadar
-karşın
-katrilyon
-kendi
-kendilerine
-kendini
-kendisi
-kendisine
-kendisini
-kez
-ki
-kim
-kimden
-kime
-kimi
-kimse
-kırk
-milyar
-milyon
-mu
-mü
-mı
-nasıl
-ne
-neden
-nedenle
-nerde
-nerede
-nereye
-niye
-niçin
-o
-olan
-olarak
-oldu
-olduğu
-olduğunu
-olduklarını
-olmadı
-olmadığı
-olmak
-olması
-olmayan
-olmaz
-olsa
-olsun
-olup
-olur
-olursa
-oluyor
-on
-ona
-ondan
-onlar
-onlardan
-onları
-onların
-onu
-onun
-otuz
-oysa
-öyle
-pek
-rağmen
-sadece
-sanki
-sekiz
-seksen
-sen
-senden
-seni
-senin
-siz
-sizden
-sizi
-sizin
-şey
-şeyden
-şeyi
-şeyler
-şöyle
-şu
-şuna
-şunda
-şundan
-şunları
-şunu
-tarafından
-trilyon
-tüm
-üç
-üzere
-var
-vardı
-ve
-veya
-ya
-yani
-yapacak
-yapılan
-yapılması
-yapıyor
-yapmak
-yaptı
-yaptığı
-yaptığını
-yaptıkları
-yedi
-yerine
-yetmiş
-yine
-yirmi
-yoksa
-yüz
-zaten
+++ /dev/null
-#
-# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
-#
-# Add entries to this file in order to override the statistical model in terms
-# of segmentation, readings and part-of-speech tags. Notice that entries do
-# not have weights since they are always used when found. This is by-design
-# in order to maximize ease-of-use.
-#
-# Entries are defined using the following CSV format:
-# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
-#
-# Notice that a single half-width space separates tokens and readings, and
-# that the number tokens and readings must match exactly.
-#
-# Also notice that multiple entries with the same <text> is undefined.
-#
-# Whitespace only lines are ignored. Comments are not allowed on entry lines.
-#
-
-# Custom segmentation for kanji compounds
-日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
-関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
-
-# Custom segmentation for compound katakana
-トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
-ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
-
-# Custom reading for former sumo wrestler
-朝青龍,朝青龍,アサショウリュウ,カスタム人名
+++ /dev/null
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This map converts alphabetic, numeric, and symbolic Unicode characters
-# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
-# block) into their ASCII equivalents, if one exists.
-#
-# Characters from the following Unicode blocks are converted; however, only
-# those characters with reasonable ASCII alternatives are converted:
-#
-# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
-# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
-# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
-# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
-# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
-# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
-# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
-# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
-# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
-# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
-# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
-# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
-# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
-# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
-# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
-# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
-#
-# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
-#
-# The set of character conversions supported by this map is a superset of
-# those supported by the map represented by mapping-ISOLatin1Accent.txt.
-#
-# See the bottom of this file for the Perl script used to generate the contents
-# of this file (without this header) from ASCIIFoldingFilter.java.
-
-
-# Syntax:
-# "source" => "target"
-# "source".length() > 0 (source cannot be empty.)
-# "target".length() >= 0 (target can be empty.)
-
-
-# À [LATIN CAPITAL LETTER A WITH GRAVE]
-"\u00C0" => "A"
-
-# Á [LATIN CAPITAL LETTER A WITH ACUTE]
-"\u00C1" => "A"
-
-# Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
-"\u00C2" => "A"
-
-# Ã [LATIN CAPITAL LETTER A WITH TILDE]
-"\u00C3" => "A"
-
-# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
-"\u00C4" => "A"
-
-# Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
-"\u00C5" => "A"
-
-# Ā [LATIN CAPITAL LETTER A WITH MACRON]
-"\u0100" => "A"
-
-# Ă [LATIN CAPITAL LETTER A WITH BREVE]
-"\u0102" => "A"
-
-# Ą [LATIN CAPITAL LETTER A WITH OGONEK]
-"\u0104" => "A"
-
-# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
-"\u018F" => "A"
-
-# Ǎ [LATIN CAPITAL LETTER A WITH CARON]
-"\u01CD" => "A"
-
-# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
-"\u01DE" => "A"
-
-# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
-"\u01E0" => "A"
-
-# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
-"\u01FA" => "A"
-
-# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
-"\u0200" => "A"
-
-# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
-"\u0202" => "A"
-
-# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
-"\u0226" => "A"
-
-# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
-"\u023A" => "A"
-
-# ᴀ [LATIN LETTER SMALL CAPITAL A]
-"\u1D00" => "A"
-
-# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
-"\u1E00" => "A"
-
-# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
-"\u1EA0" => "A"
-
-# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
-"\u1EA2" => "A"
-
-# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
-"\u1EA4" => "A"
-
-# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
-"\u1EA6" => "A"
-
-# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EA8" => "A"
-
-# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
-"\u1EAA" => "A"
-
-# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EAC" => "A"
-
-# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
-"\u1EAE" => "A"
-
-# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
-"\u1EB0" => "A"
-
-# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
-"\u1EB2" => "A"
-
-# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
-"\u1EB4" => "A"
-
-# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
-"\u1EB6" => "A"
-
-# Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
-"\u24B6" => "A"
-
-# A [FULLWIDTH LATIN CAPITAL LETTER A]
-"\uFF21" => "A"
-
-# à [LATIN SMALL LETTER A WITH GRAVE]
-"\u00E0" => "a"
-
-# á [LATIN SMALL LETTER A WITH ACUTE]
-"\u00E1" => "a"
-
-# â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
-"\u00E2" => "a"
-
-# ã [LATIN SMALL LETTER A WITH TILDE]
-"\u00E3" => "a"
-
-# ä [LATIN SMALL LETTER A WITH DIAERESIS]
-"\u00E4" => "a"
-
-# å [LATIN SMALL LETTER A WITH RING ABOVE]
-"\u00E5" => "a"
-
-# ā [LATIN SMALL LETTER A WITH MACRON]
-"\u0101" => "a"
-
-# ă [LATIN SMALL LETTER A WITH BREVE]
-"\u0103" => "a"
-
-# ą [LATIN SMALL LETTER A WITH OGONEK]
-"\u0105" => "a"
-
-# ǎ [LATIN SMALL LETTER A WITH CARON]
-"\u01CE" => "a"
-
-# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
-"\u01DF" => "a"
-
-# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
-"\u01E1" => "a"
-
-# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
-"\u01FB" => "a"
-
-# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
-"\u0201" => "a"
-
-# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
-"\u0203" => "a"
-
-# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
-"\u0227" => "a"
-
-# ɐ [LATIN SMALL LETTER TURNED A]
-"\u0250" => "a"
-
-# ə [LATIN SMALL LETTER SCHWA]
-"\u0259" => "a"
-
-# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
-"\u025A" => "a"
-
-# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
-"\u1D8F" => "a"
-
-# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
-"\u1D95" => "a"
-
-# ạ [LATIN SMALL LETTER A WITH RING BELOW]
-"\u1E01" => "a"
-
-# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
-"\u1E9A" => "a"
-
-# ạ [LATIN SMALL LETTER A WITH DOT BELOW]
-"\u1EA1" => "a"
-
-# ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
-"\u1EA3" => "a"
-
-# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
-"\u1EA5" => "a"
-
-# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
-"\u1EA7" => "a"
-
-# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EA9" => "a"
-
-# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
-"\u1EAB" => "a"
-
-# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EAD" => "a"
-
-# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
-"\u1EAF" => "a"
-
-# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
-"\u1EB1" => "a"
-
-# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
-"\u1EB3" => "a"
-
-# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
-"\u1EB5" => "a"
-
-# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
-"\u1EB7" => "a"
-
-# ₐ [LATIN SUBSCRIPT SMALL LETTER A]
-"\u2090" => "a"
-
-# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
-"\u2094" => "a"
-
-# ⓐ [CIRCLED LATIN SMALL LETTER A]
-"\u24D0" => "a"
-
-# ⱥ [LATIN SMALL LETTER A WITH STROKE]
-"\u2C65" => "a"
-
-# Ɐ [LATIN CAPITAL LETTER TURNED A]
-"\u2C6F" => "a"
-
-# a [FULLWIDTH LATIN SMALL LETTER A]
-"\uFF41" => "a"
-
-# Ꜳ [LATIN CAPITAL LETTER AA]
-"\uA732" => "AA"
-
-# Æ [LATIN CAPITAL LETTER AE]
-"\u00C6" => "AE"
-
-# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
-"\u01E2" => "AE"
-
-# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
-"\u01FC" => "AE"
-
-# ᴁ [LATIN LETTER SMALL CAPITAL AE]
-"\u1D01" => "AE"
-
-# Ꜵ [LATIN CAPITAL LETTER AO]
-"\uA734" => "AO"
-
-# Ꜷ [LATIN CAPITAL LETTER AU]
-"\uA736" => "AU"
-
-# Ꜹ [LATIN CAPITAL LETTER AV]
-"\uA738" => "AV"
-
-# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
-"\uA73A" => "AV"
-
-# Ꜽ [LATIN CAPITAL LETTER AY]
-"\uA73C" => "AY"
-
-# ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
-"\u249C" => "(a)"
-
-# ꜳ [LATIN SMALL LETTER AA]
-"\uA733" => "aa"
-
-# æ [LATIN SMALL LETTER AE]
-"\u00E6" => "ae"
-
-# ǣ [LATIN SMALL LETTER AE WITH MACRON]
-"\u01E3" => "ae"
-
-# ǽ [LATIN SMALL LETTER AE WITH ACUTE]
-"\u01FD" => "ae"
-
-# ᴂ [LATIN SMALL LETTER TURNED AE]
-"\u1D02" => "ae"
-
-# ꜵ [LATIN SMALL LETTER AO]
-"\uA735" => "ao"
-
-# ꜷ [LATIN SMALL LETTER AU]
-"\uA737" => "au"
-
-# ꜹ [LATIN SMALL LETTER AV]
-"\uA739" => "av"
-
-# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
-"\uA73B" => "av"
-
-# ꜽ [LATIN SMALL LETTER AY]
-"\uA73D" => "ay"
-
-# Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
-"\u0181" => "B"
-
-# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
-"\u0182" => "B"
-
-# Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
-"\u0243" => "B"
-
-# ʙ [LATIN LETTER SMALL CAPITAL B]
-"\u0299" => "B"
-
-# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
-"\u1D03" => "B"
-
-# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
-"\u1E02" => "B"
-
-# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
-"\u1E04" => "B"
-
-# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
-"\u1E06" => "B"
-
-# Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
-"\u24B7" => "B"
-
-# B [FULLWIDTH LATIN CAPITAL LETTER B]
-"\uFF22" => "B"
-
-# ƀ [LATIN SMALL LETTER B WITH STROKE]
-"\u0180" => "b"
-
-# ƃ [LATIN SMALL LETTER B WITH TOPBAR]
-"\u0183" => "b"
-
-# ɓ [LATIN SMALL LETTER B WITH HOOK]
-"\u0253" => "b"
-
-# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
-"\u1D6C" => "b"
-
-# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
-"\u1D80" => "b"
-
-# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
-"\u1E03" => "b"
-
-# ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
-"\u1E05" => "b"
-
-# ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
-"\u1E07" => "b"
-
-# ⓑ [CIRCLED LATIN SMALL LETTER B]
-"\u24D1" => "b"
-
-# b [FULLWIDTH LATIN SMALL LETTER B]
-"\uFF42" => "b"
-
-# ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
-"\u249D" => "(b)"
-
-# Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
-"\u00C7" => "C"
-
-# Ć [LATIN CAPITAL LETTER C WITH ACUTE]
-"\u0106" => "C"
-
-# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
-"\u0108" => "C"
-
-# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
-"\u010A" => "C"
-
-# Č [LATIN CAPITAL LETTER C WITH CARON]
-"\u010C" => "C"
-
-# Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
-"\u0187" => "C"
-
-# Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
-"\u023B" => "C"
-
-# ʗ [LATIN LETTER STRETCHED C]
-"\u0297" => "C"
-
-# ᴄ [LATIN LETTER SMALL CAPITAL C]
-"\u1D04" => "C"
-
-# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
-"\u1E08" => "C"
-
-# Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
-"\u24B8" => "C"
-
-# C [FULLWIDTH LATIN CAPITAL LETTER C]
-"\uFF23" => "C"
-
-# ç [LATIN SMALL LETTER C WITH CEDILLA]
-"\u00E7" => "c"
-
-# ć [LATIN SMALL LETTER C WITH ACUTE]
-"\u0107" => "c"
-
-# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
-"\u0109" => "c"
-
-# ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
-"\u010B" => "c"
-
-# č [LATIN SMALL LETTER C WITH CARON]
-"\u010D" => "c"
-
-# ƈ [LATIN SMALL LETTER C WITH HOOK]
-"\u0188" => "c"
-
-# ȼ [LATIN SMALL LETTER C WITH STROKE]
-"\u023C" => "c"
-
-# ɕ [LATIN SMALL LETTER C WITH CURL]
-"\u0255" => "c"
-
-# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
-"\u1E09" => "c"
-
-# ↄ [LATIN SMALL LETTER REVERSED C]
-"\u2184" => "c"
-
-# ⓒ [CIRCLED LATIN SMALL LETTER C]
-"\u24D2" => "c"
-
-# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
-"\uA73E" => "c"
-
-# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
-"\uA73F" => "c"
-
-# c [FULLWIDTH LATIN SMALL LETTER C]
-"\uFF43" => "c"
-
-# ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
-"\u249E" => "(c)"
-
-# Ð [LATIN CAPITAL LETTER ETH]
-"\u00D0" => "D"
-
-# Ď [LATIN CAPITAL LETTER D WITH CARON]
-"\u010E" => "D"
-
-# Đ [LATIN CAPITAL LETTER D WITH STROKE]
-"\u0110" => "D"
-
-# Ɖ [LATIN CAPITAL LETTER AFRICAN D]
-"\u0189" => "D"
-
-# Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
-"\u018A" => "D"
-
-# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
-"\u018B" => "D"
-
-# ᴅ [LATIN LETTER SMALL CAPITAL D]
-"\u1D05" => "D"
-
-# ᴆ [LATIN LETTER SMALL CAPITAL ETH]
-"\u1D06" => "D"
-
-# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
-"\u1E0A" => "D"
-
-# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
-"\u1E0C" => "D"
-
-# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
-"\u1E0E" => "D"
-
-# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
-"\u1E10" => "D"
-
-# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
-"\u1E12" => "D"
-
-# Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
-"\u24B9" => "D"
-
-# Ꝺ [LATIN CAPITAL LETTER INSULAR D]
-"\uA779" => "D"
-
-# D [FULLWIDTH LATIN CAPITAL LETTER D]
-"\uFF24" => "D"
-
-# ð [LATIN SMALL LETTER ETH]
-"\u00F0" => "d"
-
-# ď [LATIN SMALL LETTER D WITH CARON]
-"\u010F" => "d"
-
-# đ [LATIN SMALL LETTER D WITH STROKE]
-"\u0111" => "d"
-
-# ƌ [LATIN SMALL LETTER D WITH TOPBAR]
-"\u018C" => "d"
-
-# ȡ [LATIN SMALL LETTER D WITH CURL]
-"\u0221" => "d"
-
-# ɖ [LATIN SMALL LETTER D WITH TAIL]
-"\u0256" => "d"
-
-# ɗ [LATIN SMALL LETTER D WITH HOOK]
-"\u0257" => "d"
-
-# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
-"\u1D6D" => "d"
-
-# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
-"\u1D81" => "d"
-
-# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
-"\u1D91" => "d"
-
-# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
-"\u1E0B" => "d"
-
-# ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
-"\u1E0D" => "d"
-
-# ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
-"\u1E0F" => "d"
-
-# ḑ [LATIN SMALL LETTER D WITH CEDILLA]
-"\u1E11" => "d"
-
-# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
-"\u1E13" => "d"
-
-# ⓓ [CIRCLED LATIN SMALL LETTER D]
-"\u24D3" => "d"
-
-# ꝺ [LATIN SMALL LETTER INSULAR D]
-"\uA77A" => "d"
-
-# d [FULLWIDTH LATIN SMALL LETTER D]
-"\uFF44" => "d"
-
-# DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
-"\u01C4" => "DZ"
-
-# DZ [LATIN CAPITAL LETTER DZ]
-"\u01F1" => "DZ"
-
-# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
-"\u01C5" => "Dz"
-
-# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
-"\u01F2" => "Dz"
-
-# ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
-"\u249F" => "(d)"
-
-# ȸ [LATIN SMALL LETTER DB DIGRAPH]
-"\u0238" => "db"
-
-# dž [LATIN SMALL LETTER DZ WITH CARON]
-"\u01C6" => "dz"
-
-# dz [LATIN SMALL LETTER DZ]
-"\u01F3" => "dz"
-
-# ʣ [LATIN SMALL LETTER DZ DIGRAPH]
-"\u02A3" => "dz"
-
-# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
-"\u02A5" => "dz"
-
-# È [LATIN CAPITAL LETTER E WITH GRAVE]
-"\u00C8" => "E"
-
-# É [LATIN CAPITAL LETTER E WITH ACUTE]
-"\u00C9" => "E"
-
-# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
-"\u00CA" => "E"
-
-# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
-"\u00CB" => "E"
-
-# Ē [LATIN CAPITAL LETTER E WITH MACRON]
-"\u0112" => "E"
-
-# Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
-"\u0114" => "E"
-
-# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
-"\u0116" => "E"
-
-# Ę [LATIN CAPITAL LETTER E WITH OGONEK]
-"\u0118" => "E"
-
-# Ě [LATIN CAPITAL LETTER E WITH CARON]
-"\u011A" => "E"
-
-# Ǝ [LATIN CAPITAL LETTER REVERSED E]
-"\u018E" => "E"
-
-# Ɛ [LATIN CAPITAL LETTER OPEN E]
-"\u0190" => "E"
-
-# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
-"\u0204" => "E"
-
-# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
-"\u0206" => "E"
-
-# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
-"\u0228" => "E"
-
-# Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
-"\u0246" => "E"
-
-# ᴇ [LATIN LETTER SMALL CAPITAL E]
-"\u1D07" => "E"
-
-# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
-"\u1E14" => "E"
-
-# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
-"\u1E16" => "E"
-
-# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
-"\u1E18" => "E"
-
-# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
-"\u1E1A" => "E"
-
-# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
-"\u1E1C" => "E"
-
-# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
-"\u1EB8" => "E"
-
-# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
-"\u1EBA" => "E"
-
-# Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
-"\u1EBC" => "E"
-
-# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
-"\u1EBE" => "E"
-
-# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
-"\u1EC0" => "E"
-
-# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EC2" => "E"
-
-# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
-"\u1EC4" => "E"
-
-# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EC6" => "E"
-
-# Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
-"\u24BA" => "E"
-
-# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
-"\u2C7B" => "E"
-
-# E [FULLWIDTH LATIN CAPITAL LETTER E]
-"\uFF25" => "E"
-
-# è [LATIN SMALL LETTER E WITH GRAVE]
-"\u00E8" => "e"
-
-# é [LATIN SMALL LETTER E WITH ACUTE]
-"\u00E9" => "e"
-
-# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
-"\u00EA" => "e"
-
-# ë [LATIN SMALL LETTER E WITH DIAERESIS]
-"\u00EB" => "e"
-
-# ē [LATIN SMALL LETTER E WITH MACRON]
-"\u0113" => "e"
-
-# ĕ [LATIN SMALL LETTER E WITH BREVE]
-"\u0115" => "e"
-
-# ė [LATIN SMALL LETTER E WITH DOT ABOVE]
-"\u0117" => "e"
-
-# ę [LATIN SMALL LETTER E WITH OGONEK]
-"\u0119" => "e"
-
-# ě [LATIN SMALL LETTER E WITH CARON]
-"\u011B" => "e"
-
-# ǝ [LATIN SMALL LETTER TURNED E]
-"\u01DD" => "e"
-
-# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
-"\u0205" => "e"
-
-# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
-"\u0207" => "e"
-
-# ȩ [LATIN SMALL LETTER E WITH CEDILLA]
-"\u0229" => "e"
-
-# ɇ [LATIN SMALL LETTER E WITH STROKE]
-"\u0247" => "e"
-
-# ɘ [LATIN SMALL LETTER REVERSED E]
-"\u0258" => "e"
-
-# ɛ [LATIN SMALL LETTER OPEN E]
-"\u025B" => "e"
-
-# ɜ [LATIN SMALL LETTER REVERSED OPEN E]
-"\u025C" => "e"
-
-# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
-"\u025D" => "e"
-
-# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
-"\u025E" => "e"
-
-# ʚ [LATIN SMALL LETTER CLOSED OPEN E]
-"\u029A" => "e"
-
-# ᴈ [LATIN SMALL LETTER TURNED OPEN E]
-"\u1D08" => "e"
-
-# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
-"\u1D92" => "e"
-
-# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
-"\u1D93" => "e"
-
-# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
-"\u1D94" => "e"
-
-# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
-"\u1E15" => "e"
-
-# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
-"\u1E17" => "e"
-
-# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
-"\u1E19" => "e"
-
-# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
-"\u1E1B" => "e"
-
-# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
-"\u1E1D" => "e"
-
-# ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
-"\u1EB9" => "e"
-
-# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
-"\u1EBB" => "e"
-
-# ẽ [LATIN SMALL LETTER E WITH TILDE]
-"\u1EBD" => "e"
-
-# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
-"\u1EBF" => "e"
-
-# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
-"\u1EC1" => "e"
-
-# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EC3" => "e"
-
-# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
-"\u1EC5" => "e"
-
-# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EC7" => "e"
-
-# ₑ [LATIN SUBSCRIPT SMALL LETTER E]
-"\u2091" => "e"
-
-# ⓔ [CIRCLED LATIN SMALL LETTER E]
-"\u24D4" => "e"
-
-# ⱸ [LATIN SMALL LETTER E WITH NOTCH]
-"\u2C78" => "e"
-
-# e [FULLWIDTH LATIN SMALL LETTER E]
-"\uFF45" => "e"
-
-# ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
-"\u24A0" => "(e)"
-
-# Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
-"\u0191" => "F"
-
-# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
-"\u1E1E" => "F"
-
-# Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
-"\u24BB" => "F"
-
-# ꜰ [LATIN LETTER SMALL CAPITAL F]
-"\uA730" => "F"
-
-# Ꝼ [LATIN CAPITAL LETTER INSULAR F]
-"\uA77B" => "F"
-
-# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
-"\uA7FB" => "F"
-
-# F [FULLWIDTH LATIN CAPITAL LETTER F]
-"\uFF26" => "F"
-
-# ƒ [LATIN SMALL LETTER F WITH HOOK]
-"\u0192" => "f"
-
-# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
-"\u1D6E" => "f"
-
-# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
-"\u1D82" => "f"
-
-# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
-"\u1E1F" => "f"
-
-# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
-"\u1E9B" => "f"
-
-# ⓕ [CIRCLED LATIN SMALL LETTER F]
-"\u24D5" => "f"
-
-# ꝼ [LATIN SMALL LETTER INSULAR F]
-"\uA77C" => "f"
-
-# f [FULLWIDTH LATIN SMALL LETTER F]
-"\uFF46" => "f"
-
-# ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
-"\u24A1" => "(f)"
-
-# ff [LATIN SMALL LIGATURE FF]
-"\uFB00" => "ff"
-
-# ffi [LATIN SMALL LIGATURE FFI]
-"\uFB03" => "ffi"
-
-# ffl [LATIN SMALL LIGATURE FFL]
-"\uFB04" => "ffl"
-
-# fi [LATIN SMALL LIGATURE FI]
-"\uFB01" => "fi"
-
-# fl [LATIN SMALL LIGATURE FL]
-"\uFB02" => "fl"
-
-# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
-"\u011C" => "G"
-
-# Ğ [LATIN CAPITAL LETTER G WITH BREVE]
-"\u011E" => "G"
-
-# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
-"\u0120" => "G"
-
-# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
-"\u0122" => "G"
-
-# Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
-"\u0193" => "G"
-
-# Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
-"\u01E4" => "G"
-
-# ǥ [LATIN SMALL LETTER G WITH STROKE]
-"\u01E5" => "G"
-
-# Ǧ [LATIN CAPITAL LETTER G WITH CARON]
-"\u01E6" => "G"
-
-# ǧ [LATIN SMALL LETTER G WITH CARON]
-"\u01E7" => "G"
-
-# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
-"\u01F4" => "G"
-
-# ɢ [LATIN LETTER SMALL CAPITAL G]
-"\u0262" => "G"
-
-# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
-"\u029B" => "G"
-
-# Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
-"\u1E20" => "G"
-
-# Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
-"\u24BC" => "G"
-
-# Ᵹ [LATIN CAPITAL LETTER INSULAR G]
-"\uA77D" => "G"
-
-# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
-"\uA77E" => "G"
-
-# G [FULLWIDTH LATIN CAPITAL LETTER G]
-"\uFF27" => "G"
-
-# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
-"\u011D" => "g"
-
-# ğ [LATIN SMALL LETTER G WITH BREVE]
-"\u011F" => "g"
-
-# ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
-"\u0121" => "g"
-
-# ģ [LATIN SMALL LETTER G WITH CEDILLA]
-"\u0123" => "g"
-
-# ǵ [LATIN SMALL LETTER G WITH ACUTE]
-"\u01F5" => "g"
-
-# ɠ [LATIN SMALL LETTER G WITH HOOK]
-"\u0260" => "g"
-
-# ɡ [LATIN SMALL LETTER SCRIPT G]
-"\u0261" => "g"
-
-# ᵷ [LATIN SMALL LETTER TURNED G]
-"\u1D77" => "g"
-
-# ᵹ [LATIN SMALL LETTER INSULAR G]
-"\u1D79" => "g"
-
-# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
-"\u1D83" => "g"
-
-# ḡ [LATIN SMALL LETTER G WITH MACRON]
-"\u1E21" => "g"
-
-# ⓖ [CIRCLED LATIN SMALL LETTER G]
-"\u24D6" => "g"
-
-# ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
-"\uA77F" => "g"
-
-# g [FULLWIDTH LATIN SMALL LETTER G]
-"\uFF47" => "g"
-
-# ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
-"\u24A2" => "(g)"
-
-# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
-"\u0124" => "H"
-
-# Ħ [LATIN CAPITAL LETTER H WITH STROKE]
-"\u0126" => "H"
-
-# Ȟ [LATIN CAPITAL LETTER H WITH CARON]
-"\u021E" => "H"
-
-# ʜ [LATIN LETTER SMALL CAPITAL H]
-"\u029C" => "H"
-
-# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
-"\u1E22" => "H"
-
-# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
-"\u1E24" => "H"
-
-# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
-"\u1E26" => "H"
-
-# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
-"\u1E28" => "H"
-
-# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
-"\u1E2A" => "H"
-
-# Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
-"\u24BD" => "H"
-
-# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
-"\u2C67" => "H"
-
-# Ⱶ [LATIN CAPITAL LETTER HALF H]
-"\u2C75" => "H"
-
-# H [FULLWIDTH LATIN CAPITAL LETTER H]
-"\uFF28" => "H"
-
-# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
-"\u0125" => "h"
-
-# ħ [LATIN SMALL LETTER H WITH STROKE]
-"\u0127" => "h"
-
-# ȟ [LATIN SMALL LETTER H WITH CARON]
-"\u021F" => "h"
-
-# ɥ [LATIN SMALL LETTER TURNED H]
-"\u0265" => "h"
-
-# ɦ [LATIN SMALL LETTER H WITH HOOK]
-"\u0266" => "h"
-
-# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
-"\u02AE" => "h"
-
-# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
-"\u02AF" => "h"
-
-# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
-"\u1E23" => "h"
-
-# ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
-"\u1E25" => "h"
-
-# ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
-"\u1E27" => "h"
-
-# ḩ [LATIN SMALL LETTER H WITH CEDILLA]
-"\u1E29" => "h"
-
-# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
-"\u1E2B" => "h"
-
-# ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
-"\u1E96" => "h"
-
-# ⓗ [CIRCLED LATIN SMALL LETTER H]
-"\u24D7" => "h"
-
-# ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
-"\u2C68" => "h"
-
-# ⱶ [LATIN SMALL LETTER HALF H]
-"\u2C76" => "h"
-
-# h [FULLWIDTH LATIN SMALL LETTER H]
-"\uFF48" => "h"
-
-# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
-"\u01F6" => "HV"
-
-# ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
-"\u24A3" => "(h)"
-
-# ƕ [LATIN SMALL LETTER HV]
-"\u0195" => "hv"
-
-# Ì [LATIN CAPITAL LETTER I WITH GRAVE]
-"\u00CC" => "I"
-
-# Í [LATIN CAPITAL LETTER I WITH ACUTE]
-"\u00CD" => "I"
-
-# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
-"\u00CE" => "I"
-
-# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
-"\u00CF" => "I"
-
-# Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
-"\u0128" => "I"
-
-# Ī [LATIN CAPITAL LETTER I WITH MACRON]
-"\u012A" => "I"
-
-# Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
-"\u012C" => "I"
-
-# Į [LATIN CAPITAL LETTER I WITH OGONEK]
-"\u012E" => "I"
-
-# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
-"\u0130" => "I"
-
-# Ɩ [LATIN CAPITAL LETTER IOTA]
-"\u0196" => "I"
-
-# Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
-"\u0197" => "I"
-
-# Ǐ [LATIN CAPITAL LETTER I WITH CARON]
-"\u01CF" => "I"
-
-# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
-"\u0208" => "I"
-
-# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
-"\u020A" => "I"
-
-# ɪ [LATIN LETTER SMALL CAPITAL I]
-"\u026A" => "I"
-
-# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
-"\u1D7B" => "I"
-
-# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
-"\u1E2C" => "I"
-
-# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
-"\u1E2E" => "I"
-
-# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
-"\u1EC8" => "I"
-
-# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
-"\u1ECA" => "I"
-
-# Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
-"\u24BE" => "I"
-
-# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
-"\uA7FE" => "I"
-
-# I [FULLWIDTH LATIN CAPITAL LETTER I]
-"\uFF29" => "I"
-
-# ì [LATIN SMALL LETTER I WITH GRAVE]
-"\u00EC" => "i"
-
-# í [LATIN SMALL LETTER I WITH ACUTE]
-"\u00ED" => "i"
-
-# î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
-"\u00EE" => "i"
-
-# ï [LATIN SMALL LETTER I WITH DIAERESIS]
-"\u00EF" => "i"
-
-# ĩ [LATIN SMALL LETTER I WITH TILDE]
-"\u0129" => "i"
-
-# ī [LATIN SMALL LETTER I WITH MACRON]
-"\u012B" => "i"
-
-# ĭ [LATIN SMALL LETTER I WITH BREVE]
-"\u012D" => "i"
-
-# į [LATIN SMALL LETTER I WITH OGONEK]
-"\u012F" => "i"
-
-# ı [LATIN SMALL LETTER DOTLESS I]
-"\u0131" => "i"
-
-# ǐ [LATIN SMALL LETTER I WITH CARON]
-"\u01D0" => "i"
-
-# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
-"\u0209" => "i"
-
-# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
-"\u020B" => "i"
-
-# ɨ [LATIN SMALL LETTER I WITH STROKE]
-"\u0268" => "i"
-
-# ᴉ [LATIN SMALL LETTER TURNED I]
-"\u1D09" => "i"
-
-# ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
-"\u1D62" => "i"
-
-# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
-"\u1D7C" => "i"
-
-# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
-"\u1D96" => "i"
-
-# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
-"\u1E2D" => "i"
-
-# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
-"\u1E2F" => "i"
-
-# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
-"\u1EC9" => "i"
-
-# ị [LATIN SMALL LETTER I WITH DOT BELOW]
-"\u1ECB" => "i"
-
-# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
-"\u2071" => "i"
-
-# ⓘ [CIRCLED LATIN SMALL LETTER I]
-"\u24D8" => "i"
-
-# i [FULLWIDTH LATIN SMALL LETTER I]
-"\uFF49" => "i"
-
-# IJ [LATIN CAPITAL LIGATURE IJ]
-"\u0132" => "IJ"
-
-# ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
-"\u24A4" => "(i)"
-
-# ij [LATIN SMALL LIGATURE IJ]
-"\u0133" => "ij"
-
-# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
-"\u0134" => "J"
-
-# Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
-"\u0248" => "J"
-
-# ᴊ [LATIN LETTER SMALL CAPITAL J]
-"\u1D0A" => "J"
-
-# Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
-"\u24BF" => "J"
-
-# J [FULLWIDTH LATIN CAPITAL LETTER J]
-"\uFF2A" => "J"
-
-# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
-"\u0135" => "j"
-
-# ǰ [LATIN SMALL LETTER J WITH CARON]
-"\u01F0" => "j"
-
-# ȷ [LATIN SMALL LETTER DOTLESS J]
-"\u0237" => "j"
-
-# ɉ [LATIN SMALL LETTER J WITH STROKE]
-"\u0249" => "j"
-
-# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
-"\u025F" => "j"
-
-# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
-"\u0284" => "j"
-
-# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
-"\u029D" => "j"
-
-# ⓙ [CIRCLED LATIN SMALL LETTER J]
-"\u24D9" => "j"
-
-# ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
-"\u2C7C" => "j"
-
-# j [FULLWIDTH LATIN SMALL LETTER J]
-"\uFF4A" => "j"
-
-# ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
-"\u24A5" => "(j)"
-
-# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
-"\u0136" => "K"
-
-# Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
-"\u0198" => "K"
-
-# Ǩ [LATIN CAPITAL LETTER K WITH CARON]
-"\u01E8" => "K"
-
-# ᴋ [LATIN LETTER SMALL CAPITAL K]
-"\u1D0B" => "K"
-
-# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
-"\u1E30" => "K"
-
-# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
-"\u1E32" => "K"
-
-# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
-"\u1E34" => "K"
-
-# Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
-"\u24C0" => "K"
-
-# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
-"\u2C69" => "K"
-
-# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
-"\uA740" => "K"
-
-# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
-"\uA742" => "K"
-
-# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
-"\uA744" => "K"
-
-# K [FULLWIDTH LATIN CAPITAL LETTER K]
-"\uFF2B" => "K"
-
-# ķ [LATIN SMALL LETTER K WITH CEDILLA]
-"\u0137" => "k"
-
-# ƙ [LATIN SMALL LETTER K WITH HOOK]
-"\u0199" => "k"
-
-# ǩ [LATIN SMALL LETTER K WITH CARON]
-"\u01E9" => "k"
-
-# ʞ [LATIN SMALL LETTER TURNED K]
-"\u029E" => "k"
-
-# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
-"\u1D84" => "k"
-
-# ḱ [LATIN SMALL LETTER K WITH ACUTE]
-"\u1E31" => "k"
-
-# ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
-"\u1E33" => "k"
-
-# ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
-"\u1E35" => "k"
-
-# ⓚ [CIRCLED LATIN SMALL LETTER K]
-"\u24DA" => "k"
-
-# ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
-"\u2C6A" => "k"
-
-# ꝁ [LATIN SMALL LETTER K WITH STROKE]
-"\uA741" => "k"
-
-# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
-"\uA743" => "k"
-
-# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
-"\uA745" => "k"
-
-# k [FULLWIDTH LATIN SMALL LETTER K]
-"\uFF4B" => "k"
-
-# ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
-"\u24A6" => "(k)"
-
-# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
-"\u0139" => "L"
-
-# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
-"\u013B" => "L"
-
-# Ľ [LATIN CAPITAL LETTER L WITH CARON]
-"\u013D" => "L"
-
-# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
-"\u013F" => "L"
-
-# Ł [LATIN CAPITAL LETTER L WITH STROKE]
-"\u0141" => "L"
-
-# Ƚ [LATIN CAPITAL LETTER L WITH BAR]
-"\u023D" => "L"
-
-# ʟ [LATIN LETTER SMALL CAPITAL L]
-"\u029F" => "L"
-
-# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
-"\u1D0C" => "L"
-
-# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
-"\u1E36" => "L"
-
-# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
-"\u1E38" => "L"
-
-# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
-"\u1E3A" => "L"
-
-# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
-"\u1E3C" => "L"
-
-# Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
-"\u24C1" => "L"
-
-# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
-"\u2C60" => "L"
-
-# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
-"\u2C62" => "L"
-
-# Ꝇ [LATIN CAPITAL LETTER BROKEN L]
-"\uA746" => "L"
-
-# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
-"\uA748" => "L"
-
-# Ꞁ [LATIN CAPITAL LETTER TURNED L]
-"\uA780" => "L"
-
-# L [FULLWIDTH LATIN CAPITAL LETTER L]
-"\uFF2C" => "L"
-
-# ĺ [LATIN SMALL LETTER L WITH ACUTE]
-"\u013A" => "l"
-
-# ļ [LATIN SMALL LETTER L WITH CEDILLA]
-"\u013C" => "l"
-
-# ľ [LATIN SMALL LETTER L WITH CARON]
-"\u013E" => "l"
-
-# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
-"\u0140" => "l"
-
-# ł [LATIN SMALL LETTER L WITH STROKE]
-"\u0142" => "l"
-
-# ƚ [LATIN SMALL LETTER L WITH BAR]
-"\u019A" => "l"
-
-# ȴ [LATIN SMALL LETTER L WITH CURL]
-"\u0234" => "l"
-
-# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
-"\u026B" => "l"
-
-# ɬ [LATIN SMALL LETTER L WITH BELT]
-"\u026C" => "l"
-
-# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
-"\u026D" => "l"
-
-# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
-"\u1D85" => "l"
-
-# ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
-"\u1E37" => "l"
-
-# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
-"\u1E39" => "l"
-
-# ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
-"\u1E3B" => "l"
-
-# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
-"\u1E3D" => "l"
-
-# ⓛ [CIRCLED LATIN SMALL LETTER L]
-"\u24DB" => "l"
-
-# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
-"\u2C61" => "l"
-
-# ꝇ [LATIN SMALL LETTER BROKEN L]
-"\uA747" => "l"
-
-# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
-"\uA749" => "l"
-
-# ꞁ [LATIN SMALL LETTER TURNED L]
-"\uA781" => "l"
-
-# l [FULLWIDTH LATIN SMALL LETTER L]
-"\uFF4C" => "l"
-
-# LJ [LATIN CAPITAL LETTER LJ]
-"\u01C7" => "LJ"
-
-# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
-"\u1EFA" => "LL"
-
-# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
-"\u01C8" => "Lj"
-
-# ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
-"\u24A7" => "(l)"
-
-# lj [LATIN SMALL LETTER LJ]
-"\u01C9" => "lj"
-
-# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
-"\u1EFB" => "ll"
-
-# ʪ [LATIN SMALL LETTER LS DIGRAPH]
-"\u02AA" => "ls"
-
-# ʫ [LATIN SMALL LETTER LZ DIGRAPH]
-"\u02AB" => "lz"
-
-# Ɯ [LATIN CAPITAL LETTER TURNED M]
-"\u019C" => "M"
-
-# ᴍ [LATIN LETTER SMALL CAPITAL M]
-"\u1D0D" => "M"
-
-# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
-"\u1E3E" => "M"
-
-# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
-"\u1E40" => "M"
-
-# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
-"\u1E42" => "M"
-
-# Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
-"\u24C2" => "M"
-
-# Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
-"\u2C6E" => "M"
-
-# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
-"\uA7FD" => "M"
-
-# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
-"\uA7FF" => "M"
-
-# M [FULLWIDTH LATIN CAPITAL LETTER M]
-"\uFF2D" => "M"
-
-# ɯ [LATIN SMALL LETTER TURNED M]
-"\u026F" => "m"
-
-# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
-"\u0270" => "m"
-
-# ɱ [LATIN SMALL LETTER M WITH HOOK]
-"\u0271" => "m"
-
-# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
-"\u1D6F" => "m"
-
-# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
-"\u1D86" => "m"
-
-# ḿ [LATIN SMALL LETTER M WITH ACUTE]
-"\u1E3F" => "m"
-
-# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
-"\u1E41" => "m"
-
-# ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
-"\u1E43" => "m"
-
-# ⓜ [CIRCLED LATIN SMALL LETTER M]
-"\u24DC" => "m"
-
-# m [FULLWIDTH LATIN SMALL LETTER M]
-"\uFF4D" => "m"
-
-# ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
-"\u24A8" => "(m)"
-
-# Ñ [LATIN CAPITAL LETTER N WITH TILDE]
-"\u00D1" => "N"
-
-# Ń [LATIN CAPITAL LETTER N WITH ACUTE]
-"\u0143" => "N"
-
-# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
-"\u0145" => "N"
-
-# Ň [LATIN CAPITAL LETTER N WITH CARON]
-"\u0147" => "N"
-
-# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
-"\u014A" => "N"
-
-# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
-"\u019D" => "N"
-
-# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
-"\u01F8" => "N"
-
-# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
-"\u0220" => "N"
-
-# ɴ [LATIN LETTER SMALL CAPITAL N]
-"\u0274" => "N"
-
-# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
-"\u1D0E" => "N"
-
-# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
-"\u1E44" => "N"
-
-# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
-"\u1E46" => "N"
-
-# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
-"\u1E48" => "N"
-
-# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
-"\u1E4A" => "N"
-
-# Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
-"\u24C3" => "N"
-
-# N [FULLWIDTH LATIN CAPITAL LETTER N]
-"\uFF2E" => "N"
-
-# ñ [LATIN SMALL LETTER N WITH TILDE]
-"\u00F1" => "n"
-
-# ń [LATIN SMALL LETTER N WITH ACUTE]
-"\u0144" => "n"
-
-# ņ [LATIN SMALL LETTER N WITH CEDILLA]
-"\u0146" => "n"
-
-# ň [LATIN SMALL LETTER N WITH CARON]
-"\u0148" => "n"
-
-# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
-"\u0149" => "n"
-
-# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
-"\u014B" => "n"
-
-# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
-"\u019E" => "n"
-
-# ǹ [LATIN SMALL LETTER N WITH GRAVE]
-"\u01F9" => "n"
-
-# ȵ [LATIN SMALL LETTER N WITH CURL]
-"\u0235" => "n"
-
-# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
-"\u0272" => "n"
-
-# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
-"\u0273" => "n"
-
-# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
-"\u1D70" => "n"
-
-# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
-"\u1D87" => "n"
-
-# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
-"\u1E45" => "n"
-
-# ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
-"\u1E47" => "n"
-
-# ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
-"\u1E49" => "n"
-
-# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
-"\u1E4B" => "n"
-
-# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
-"\u207F" => "n"
-
-# ⓝ [CIRCLED LATIN SMALL LETTER N]
-"\u24DD" => "n"
-
-# n [FULLWIDTH LATIN SMALL LETTER N]
-"\uFF4E" => "n"
-
-# NJ [LATIN CAPITAL LETTER NJ]
-"\u01CA" => "NJ"
-
-# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
-"\u01CB" => "Nj"
-
-# ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
-"\u24A9" => "(n)"
-
-# nj [LATIN SMALL LETTER NJ]
-"\u01CC" => "nj"
-
-# Ò [LATIN CAPITAL LETTER O WITH GRAVE]
-"\u00D2" => "O"
-
-# Ó [LATIN CAPITAL LETTER O WITH ACUTE]
-"\u00D3" => "O"
-
-# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
-"\u00D4" => "O"
-
-# Õ [LATIN CAPITAL LETTER O WITH TILDE]
-"\u00D5" => "O"
-
-# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
-"\u00D6" => "O"
-
-# Ø [LATIN CAPITAL LETTER O WITH STROKE]
-"\u00D8" => "O"
-
-# Ō [LATIN CAPITAL LETTER O WITH MACRON]
-"\u014C" => "O"
-
-# Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
-"\u014E" => "O"
-
-# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
-"\u0150" => "O"
-
-# Ɔ [LATIN CAPITAL LETTER OPEN O]
-"\u0186" => "O"
-
-# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
-"\u019F" => "O"
-
-# Ơ [LATIN CAPITAL LETTER O WITH HORN]
-"\u01A0" => "O"
-
-# Ǒ [LATIN CAPITAL LETTER O WITH CARON]
-"\u01D1" => "O"
-
-# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
-"\u01EA" => "O"
-
-# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
-"\u01EC" => "O"
-
-# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
-"\u01FE" => "O"
-
-# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
-"\u020C" => "O"
-
-# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
-"\u020E" => "O"
-
-# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
-"\u022A" => "O"
-
-# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
-"\u022C" => "O"
-
-# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
-"\u022E" => "O"
-
-# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
-"\u0230" => "O"
-
-# ᴏ [LATIN LETTER SMALL CAPITAL O]
-"\u1D0F" => "O"
-
-# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
-"\u1D10" => "O"
-
-# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
-"\u1E4C" => "O"
-
-# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
-"\u1E4E" => "O"
-
-# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
-"\u1E50" => "O"
-
-# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
-"\u1E52" => "O"
-
-# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
-"\u1ECC" => "O"
-
-# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
-"\u1ECE" => "O"
-
-# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
-"\u1ED0" => "O"
-
-# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
-"\u1ED2" => "O"
-
-# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1ED4" => "O"
-
-# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
-"\u1ED6" => "O"
-
-# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-"\u1ED8" => "O"
-
-# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
-"\u1EDA" => "O"
-
-# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
-"\u1EDC" => "O"
-
-# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
-"\u1EDE" => "O"
-
-# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
-"\u1EE0" => "O"
-
-# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
-"\u1EE2" => "O"
-
-# Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
-"\u24C4" => "O"
-
-# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
-"\uA74A" => "O"
-
-# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
-"\uA74C" => "O"
-
-# O [FULLWIDTH LATIN CAPITAL LETTER O]
-"\uFF2F" => "O"
-
-# ò [LATIN SMALL LETTER O WITH GRAVE]
-"\u00F2" => "o"
-
-# ó [LATIN SMALL LETTER O WITH ACUTE]
-"\u00F3" => "o"
-
-# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
-"\u00F4" => "o"
-
-# õ [LATIN SMALL LETTER O WITH TILDE]
-"\u00F5" => "o"
-
-# ö [LATIN SMALL LETTER O WITH DIAERESIS]
-"\u00F6" => "o"
-
-# ø [LATIN SMALL LETTER O WITH STROKE]
-"\u00F8" => "o"
-
-# ō [LATIN SMALL LETTER O WITH MACRON]
-"\u014D" => "o"
-
-# ŏ [LATIN SMALL LETTER O WITH BREVE]
-"\u014F" => "o"
-
-# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
-"\u0151" => "o"
-
-# ơ [LATIN SMALL LETTER O WITH HORN]
-"\u01A1" => "o"
-
-# ǒ [LATIN SMALL LETTER O WITH CARON]
-"\u01D2" => "o"
-
-# ǫ [LATIN SMALL LETTER O WITH OGONEK]
-"\u01EB" => "o"
-
-# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
-"\u01ED" => "o"
-
-# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
-"\u01FF" => "o"
-
-# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
-"\u020D" => "o"
-
-# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
-"\u020F" => "o"
-
-# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
-"\u022B" => "o"
-
-# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
-"\u022D" => "o"
-
-# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
-"\u022F" => "o"
-
-# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
-"\u0231" => "o"
-
-# ɔ [LATIN SMALL LETTER OPEN O]
-"\u0254" => "o"
-
-# ɵ [LATIN SMALL LETTER BARRED O]
-"\u0275" => "o"
-
-# ᴖ [LATIN SMALL LETTER TOP HALF O]
-"\u1D16" => "o"
-
-# ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
-"\u1D17" => "o"
-
-# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
-"\u1D97" => "o"
-
-# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
-"\u1E4D" => "o"
-
-# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
-"\u1E4F" => "o"
-
-# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
-"\u1E51" => "o"
-
-# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
-"\u1E53" => "o"
-
-# ọ [LATIN SMALL LETTER O WITH DOT BELOW]
-"\u1ECD" => "o"
-
-# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
-"\u1ECF" => "o"
-
-# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
-"\u1ED1" => "o"
-
-# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
-"\u1ED3" => "o"
-
-# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1ED5" => "o"
-
-# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
-"\u1ED7" => "o"
-
-# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-"\u1ED9" => "o"
-
-# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
-"\u1EDB" => "o"
-
-# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
-"\u1EDD" => "o"
-
-# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
-"\u1EDF" => "o"
-
-# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
-"\u1EE1" => "o"
-
-# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
-"\u1EE3" => "o"
-
-# ₒ [LATIN SUBSCRIPT SMALL LETTER O]
-"\u2092" => "o"
-
-# ⓞ [CIRCLED LATIN SMALL LETTER O]
-"\u24DE" => "o"
-
-# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
-"\u2C7A" => "o"
-
-# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
-"\uA74B" => "o"
-
-# ꝍ [LATIN SMALL LETTER O WITH LOOP]
-"\uA74D" => "o"
-
-# o [FULLWIDTH LATIN SMALL LETTER O]
-"\uFF4F" => "o"
-
-# Œ [LATIN CAPITAL LIGATURE OE]
-"\u0152" => "OE"
-
-# ɶ [LATIN LETTER SMALL CAPITAL OE]
-"\u0276" => "OE"
-
-# Ꝏ [LATIN CAPITAL LETTER OO]
-"\uA74E" => "OO"
-
-# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
-"\u0222" => "OU"
-
-# ᴕ [LATIN LETTER SMALL CAPITAL OU]
-"\u1D15" => "OU"
-
-# ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
-"\u24AA" => "(o)"
-
-# œ [LATIN SMALL LIGATURE OE]
-"\u0153" => "oe"
-
-# ᴔ [LATIN SMALL LETTER TURNED OE]
-"\u1D14" => "oe"
-
-# ꝏ [LATIN SMALL LETTER OO]
-"\uA74F" => "oo"
-
-# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
-"\u0223" => "ou"
-
-# Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
-"\u01A4" => "P"
-
-# ᴘ [LATIN LETTER SMALL CAPITAL P]
-"\u1D18" => "P"
-
-# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
-"\u1E54" => "P"
-
-# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
-"\u1E56" => "P"
-
-# Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
-"\u24C5" => "P"
-
-# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
-"\u2C63" => "P"
-
-# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
-"\uA750" => "P"
-
-# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
-"\uA752" => "P"
-
-# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
-"\uA754" => "P"
-
-# P [FULLWIDTH LATIN CAPITAL LETTER P]
-"\uFF30" => "P"
-
-# ƥ [LATIN SMALL LETTER P WITH HOOK]
-"\u01A5" => "p"
-
-# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
-"\u1D71" => "p"
-
-# ᵽ [LATIN SMALL LETTER P WITH STROKE]
-"\u1D7D" => "p"
-
-# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
-"\u1D88" => "p"
-
-# ṕ [LATIN SMALL LETTER P WITH ACUTE]
-"\u1E55" => "p"
-
-# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
-"\u1E57" => "p"
-
-# ⓟ [CIRCLED LATIN SMALL LETTER P]
-"\u24DF" => "p"
-
-# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
-"\uA751" => "p"
-
-# ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
-"\uA753" => "p"
-
-# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
-"\uA755" => "p"
-
-# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
-"\uA7FC" => "p"
-
-# p [FULLWIDTH LATIN SMALL LETTER P]
-"\uFF50" => "p"
-
-# ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
-"\u24AB" => "(p)"
-
-# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
-"\u024A" => "Q"
-
-# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
-"\u24C6" => "Q"
-
-# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
-"\uA756" => "Q"
-
-# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
-"\uA758" => "Q"
-
-# Q [FULLWIDTH LATIN CAPITAL LETTER Q]
-"\uFF31" => "Q"
-
-# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
-"\u0138" => "q"
-
-# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
-"\u024B" => "q"
-
-# ʠ [LATIN SMALL LETTER Q WITH HOOK]
-"\u02A0" => "q"
-
-# ⓠ [CIRCLED LATIN SMALL LETTER Q]
-"\u24E0" => "q"
-
-# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
-"\uA757" => "q"
-
-# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
-"\uA759" => "q"
-
-# q [FULLWIDTH LATIN SMALL LETTER Q]
-"\uFF51" => "q"
-
-# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
-"\u24AC" => "(q)"
-
-# ȹ [LATIN SMALL LETTER QP DIGRAPH]
-"\u0239" => "qp"
-
-# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
-"\u0154" => "R"
-
-# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
-"\u0156" => "R"
-
-# Ř [LATIN CAPITAL LETTER R WITH CARON]
-"\u0158" => "R"
-
-# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
-"\u0210" => "R"
-
-# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
-"\u0212" => "R"
-
-# Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
-"\u024C" => "R"
-
-# ʀ [LATIN LETTER SMALL CAPITAL R]
-"\u0280" => "R"
-
-# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
-"\u0281" => "R"
-
-# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
-"\u1D19" => "R"
-
-# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
-"\u1D1A" => "R"
-
-# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
-"\u1E58" => "R"
-
-# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
-"\u1E5A" => "R"
-
-# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
-"\u1E5C" => "R"
-
-# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
-"\u1E5E" => "R"
-
-# Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
-"\u24C7" => "R"
-
-# Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
-"\u2C64" => "R"
-
-# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
-"\uA75A" => "R"
-
-# Ꞃ [LATIN CAPITAL LETTER INSULAR R]
-"\uA782" => "R"
-
-# R [FULLWIDTH LATIN CAPITAL LETTER R]
-"\uFF32" => "R"
-
-# ŕ [LATIN SMALL LETTER R WITH ACUTE]
-"\u0155" => "r"
-
-# ŗ [LATIN SMALL LETTER R WITH CEDILLA]
-"\u0157" => "r"
-
-# ř [LATIN SMALL LETTER R WITH CARON]
-"\u0159" => "r"
-
-# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
-"\u0211" => "r"
-
-# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
-"\u0213" => "r"
-
-# ɍ [LATIN SMALL LETTER R WITH STROKE]
-"\u024D" => "r"
-
-# ɼ [LATIN SMALL LETTER R WITH LONG LEG]
-"\u027C" => "r"
-
-# ɽ [LATIN SMALL LETTER R WITH TAIL]
-"\u027D" => "r"
-
-# ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
-"\u027E" => "r"
-
-# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
-"\u027F" => "r"
-
-# ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
-"\u1D63" => "r"
-
-# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
-"\u1D72" => "r"
-
-# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
-"\u1D73" => "r"
-
-# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
-"\u1D89" => "r"
-
-# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
-"\u1E59" => "r"
-
-# ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
-"\u1E5B" => "r"
-
-# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
-"\u1E5D" => "r"
-
-# ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
-"\u1E5F" => "r"
-
-# ⓡ [CIRCLED LATIN SMALL LETTER R]
-"\u24E1" => "r"
-
-# ꝛ [LATIN SMALL LETTER R ROTUNDA]
-"\uA75B" => "r"
-
-# ꞃ [LATIN SMALL LETTER INSULAR R]
-"\uA783" => "r"
-
-# r [FULLWIDTH LATIN SMALL LETTER R]
-"\uFF52" => "r"
-
-# ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
-"\u24AD" => "(r)"
-
-# Ś [LATIN CAPITAL LETTER S WITH ACUTE]
-"\u015A" => "S"
-
-# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
-"\u015C" => "S"
-
-# Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
-"\u015E" => "S"
-
-# Š [LATIN CAPITAL LETTER S WITH CARON]
-"\u0160" => "S"
-
-# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
-"\u0218" => "S"
-
-# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
-"\u1E60" => "S"
-
-# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
-"\u1E62" => "S"
-
-# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
-"\u1E64" => "S"
-
-# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
-"\u1E66" => "S"
-
-# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
-"\u1E68" => "S"
-
-# Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
-"\u24C8" => "S"
-
-# ꜱ [LATIN LETTER SMALL CAPITAL S]
-"\uA731" => "S"
-
-# ꞅ [LATIN SMALL LETTER INSULAR S]
-"\uA785" => "S"
-
-# S [FULLWIDTH LATIN CAPITAL LETTER S]
-"\uFF33" => "S"
-
-# ś [LATIN SMALL LETTER S WITH ACUTE]
-"\u015B" => "s"
-
-# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
-"\u015D" => "s"
-
-# ş [LATIN SMALL LETTER S WITH CEDILLA]
-"\u015F" => "s"
-
-# š [LATIN SMALL LETTER S WITH CARON]
-"\u0161" => "s"
-
-# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
-"\u017F" => "s"
-
-# ș [LATIN SMALL LETTER S WITH COMMA BELOW]
-"\u0219" => "s"
-
-# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
-"\u023F" => "s"
-
-# ʂ [LATIN SMALL LETTER S WITH HOOK]
-"\u0282" => "s"
-
-# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
-"\u1D74" => "s"
-
-# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
-"\u1D8A" => "s"
-
-# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
-"\u1E61" => "s"
-
-# ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
-"\u1E63" => "s"
-
-# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
-"\u1E65" => "s"
-
-# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
-"\u1E67" => "s"
-
-# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
-"\u1E69" => "s"
-
-# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
-"\u1E9C" => "s"
-
-# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
-"\u1E9D" => "s"
-
-# ⓢ [CIRCLED LATIN SMALL LETTER S]
-"\u24E2" => "s"
-
-# Ꞅ [LATIN CAPITAL LETTER INSULAR S]
-"\uA784" => "s"
-
-# s [FULLWIDTH LATIN SMALL LETTER S]
-"\uFF53" => "s"
-
-# ẞ [LATIN CAPITAL LETTER SHARP S]
-"\u1E9E" => "SS"
-
-# ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
-"\u24AE" => "(s)"
-
-# ß [LATIN SMALL LETTER SHARP S]
-"\u00DF" => "ss"
-
-# st [LATIN SMALL LIGATURE ST]
-"\uFB06" => "st"
-
-# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
-"\u0162" => "T"
-
-# Ť [LATIN CAPITAL LETTER T WITH CARON]
-"\u0164" => "T"
-
-# Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
-"\u0166" => "T"
-
-# Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
-"\u01AC" => "T"
-
-# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
-"\u01AE" => "T"
-
-# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
-"\u021A" => "T"
-
-# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
-"\u023E" => "T"
-
-# ᴛ [LATIN LETTER SMALL CAPITAL T]
-"\u1D1B" => "T"
-
-# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
-"\u1E6A" => "T"
-
-# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
-"\u1E6C" => "T"
-
-# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
-"\u1E6E" => "T"
-
-# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
-"\u1E70" => "T"
-
-# Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
-"\u24C9" => "T"
-
-# Ꞇ [LATIN CAPITAL LETTER INSULAR T]
-"\uA786" => "T"
-
-# T [FULLWIDTH LATIN CAPITAL LETTER T]
-"\uFF34" => "T"
-
-# ţ [LATIN SMALL LETTER T WITH CEDILLA]
-"\u0163" => "t"
-
-# ť [LATIN SMALL LETTER T WITH CARON]
-"\u0165" => "t"
-
-# ŧ [LATIN SMALL LETTER T WITH STROKE]
-"\u0167" => "t"
-
-# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
-"\u01AB" => "t"
-
-# ƭ [LATIN SMALL LETTER T WITH HOOK]
-"\u01AD" => "t"
-
-# ț [LATIN SMALL LETTER T WITH COMMA BELOW]
-"\u021B" => "t"
-
-# ȶ [LATIN SMALL LETTER T WITH CURL]
-"\u0236" => "t"
-
-# ʇ [LATIN SMALL LETTER TURNED T]
-"\u0287" => "t"
-
-# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
-"\u0288" => "t"
-
-# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
-"\u1D75" => "t"
-
-# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
-"\u1E6B" => "t"
-
-# ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
-"\u1E6D" => "t"
-
-# ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
-"\u1E6F" => "t"
-
-# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
-"\u1E71" => "t"
-
-# ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
-"\u1E97" => "t"
-
-# ⓣ [CIRCLED LATIN SMALL LETTER T]
-"\u24E3" => "t"
-
-# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
-"\u2C66" => "t"
-
-# t [FULLWIDTH LATIN SMALL LETTER T]
-"\uFF54" => "t"
-
-# Þ [LATIN CAPITAL LETTER THORN]
-"\u00DE" => "TH"
-
-# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
-"\uA766" => "TH"
-
-# Ꜩ [LATIN CAPITAL LETTER TZ]
-"\uA728" => "TZ"
-
-# ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
-"\u24AF" => "(t)"
-
-# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
-"\u02A8" => "tc"
-
-# þ [LATIN SMALL LETTER THORN]
-"\u00FE" => "th"
-
-# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
-"\u1D7A" => "th"
-
-# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
-"\uA767" => "th"
-
-# ʦ [LATIN SMALL LETTER TS DIGRAPH]
-"\u02A6" => "ts"
-
-# ꜩ [LATIN SMALL LETTER TZ]
-"\uA729" => "tz"
-
-# Ù [LATIN CAPITAL LETTER U WITH GRAVE]
-"\u00D9" => "U"
-
-# Ú [LATIN CAPITAL LETTER U WITH ACUTE]
-"\u00DA" => "U"
-
-# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
-"\u00DB" => "U"
-
-# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
-"\u00DC" => "U"
-
-# Ũ [LATIN CAPITAL LETTER U WITH TILDE]
-"\u0168" => "U"
-
-# Ū [LATIN CAPITAL LETTER U WITH MACRON]
-"\u016A" => "U"
-
-# Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
-"\u016C" => "U"
-
-# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
-"\u016E" => "U"
-
-# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
-"\u0170" => "U"
-
-# Ų [LATIN CAPITAL LETTER U WITH OGONEK]
-"\u0172" => "U"
-
-# Ư [LATIN CAPITAL LETTER U WITH HORN]
-"\u01AF" => "U"
-
-# Ǔ [LATIN CAPITAL LETTER U WITH CARON]
-"\u01D3" => "U"
-
-# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
-"\u01D5" => "U"
-
-# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
-"\u01D7" => "U"
-
-# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
-"\u01D9" => "U"
-
-# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
-"\u01DB" => "U"
-
-# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
-"\u0214" => "U"
-
-# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
-"\u0216" => "U"
-
-# Ʉ [LATIN CAPITAL LETTER U BAR]
-"\u0244" => "U"
-
-# ᴜ [LATIN LETTER SMALL CAPITAL U]
-"\u1D1C" => "U"
-
-# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
-"\u1D7E" => "U"
-
-# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
-"\u1E72" => "U"
-
-# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
-"\u1E74" => "U"
-
-# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
-"\u1E76" => "U"
-
-# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
-"\u1E78" => "U"
-
-# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
-"\u1E7A" => "U"
-
-# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
-"\u1EE4" => "U"
-
-# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
-"\u1EE6" => "U"
-
-# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
-"\u1EE8" => "U"
-
-# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
-"\u1EEA" => "U"
-
-# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
-"\u1EEC" => "U"
-
-# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
-"\u1EEE" => "U"
-
-# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
-"\u1EF0" => "U"
-
-# Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
-"\u24CA" => "U"
-
-# U [FULLWIDTH LATIN CAPITAL LETTER U]
-"\uFF35" => "U"
-
-# ù [LATIN SMALL LETTER U WITH GRAVE]
-"\u00F9" => "u"
-
-# ú [LATIN SMALL LETTER U WITH ACUTE]
-"\u00FA" => "u"
-
-# û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
-"\u00FB" => "u"
-
-# ü [LATIN SMALL LETTER U WITH DIAERESIS]
-"\u00FC" => "u"
-
-# ũ [LATIN SMALL LETTER U WITH TILDE]
-"\u0169" => "u"
-
-# ū [LATIN SMALL LETTER U WITH MACRON]
-"\u016B" => "u"
-
-# ŭ [LATIN SMALL LETTER U WITH BREVE]
-"\u016D" => "u"
-
-# ů [LATIN SMALL LETTER U WITH RING ABOVE]
-"\u016F" => "u"
-
-# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
-"\u0171" => "u"
-
-# ų [LATIN SMALL LETTER U WITH OGONEK]
-"\u0173" => "u"
-
-# ư [LATIN SMALL LETTER U WITH HORN]
-"\u01B0" => "u"
-
-# ǔ [LATIN SMALL LETTER U WITH CARON]
-"\u01D4" => "u"
-
-# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
-"\u01D6" => "u"
-
-# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
-"\u01D8" => "u"
-
-# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
-"\u01DA" => "u"
-
-# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
-"\u01DC" => "u"
-
-# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
-"\u0215" => "u"
-
-# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
-"\u0217" => "u"
-
-# ʉ [LATIN SMALL LETTER U BAR]
-"\u0289" => "u"
-
-# ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
-"\u1D64" => "u"
-
-# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
-"\u1D99" => "u"
-
-# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
-"\u1E73" => "u"
-
-# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
-"\u1E75" => "u"
-
-# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
-"\u1E77" => "u"
-
-# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
-"\u1E79" => "u"
-
-# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
-"\u1E7B" => "u"
-
-# ụ [LATIN SMALL LETTER U WITH DOT BELOW]
-"\u1EE5" => "u"
-
-# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
-"\u1EE7" => "u"
-
-# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
-"\u1EE9" => "u"
-
-# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
-"\u1EEB" => "u"
-
-# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
-"\u1EED" => "u"
-
-# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
-"\u1EEF" => "u"
-
-# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
-"\u1EF1" => "u"
-
-# ⓤ [CIRCLED LATIN SMALL LETTER U]
-"\u24E4" => "u"
-
-# u [FULLWIDTH LATIN SMALL LETTER U]
-"\uFF55" => "u"
-
-# ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
-"\u24B0" => "(u)"
-
-# ᵫ [LATIN SMALL LETTER UE]
-"\u1D6B" => "ue"
-
-# Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
-"\u01B2" => "V"
-
-# Ʌ [LATIN CAPITAL LETTER TURNED V]
-"\u0245" => "V"
-
-# ᴠ [LATIN LETTER SMALL CAPITAL V]
-"\u1D20" => "V"
-
-# Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
-"\u1E7C" => "V"
-
-# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
-"\u1E7E" => "V"
-
-# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
-"\u1EFC" => "V"
-
-# Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
-"\u24CB" => "V"
-
-# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
-"\uA75E" => "V"
-
-# Ꝩ [LATIN CAPITAL LETTER VEND]
-"\uA768" => "V"
-
-# V [FULLWIDTH LATIN CAPITAL LETTER V]
-"\uFF36" => "V"
-
-# ʋ [LATIN SMALL LETTER V WITH HOOK]
-"\u028B" => "v"
-
-# ʌ [LATIN SMALL LETTER TURNED V]
-"\u028C" => "v"
-
-# ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
-"\u1D65" => "v"
-
-# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
-"\u1D8C" => "v"
-
-# ṽ [LATIN SMALL LETTER V WITH TILDE]
-"\u1E7D" => "v"
-
-# ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
-"\u1E7F" => "v"
-
-# ⓥ [CIRCLED LATIN SMALL LETTER V]
-"\u24E5" => "v"
-
-# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
-"\u2C71" => "v"
-
-# ⱴ [LATIN SMALL LETTER V WITH CURL]
-"\u2C74" => "v"
-
-# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
-"\uA75F" => "v"
-
-# v [FULLWIDTH LATIN SMALL LETTER V]
-"\uFF56" => "v"
-
-# Ꝡ [LATIN CAPITAL LETTER VY]
-"\uA760" => "VY"
-
-# ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
-"\u24B1" => "(v)"
-
-# ꝡ [LATIN SMALL LETTER VY]
-"\uA761" => "vy"
-
-# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
-"\u0174" => "W"
-
-# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
-"\u01F7" => "W"
-
-# ᴡ [LATIN LETTER SMALL CAPITAL W]
-"\u1D21" => "W"
-
-# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
-"\u1E80" => "W"
-
-# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
-"\u1E82" => "W"
-
-# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
-"\u1E84" => "W"
-
-# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
-"\u1E86" => "W"
-
-# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
-"\u1E88" => "W"
-
-# Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
-"\u24CC" => "W"
-
-# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
-"\u2C72" => "W"
-
-# W [FULLWIDTH LATIN CAPITAL LETTER W]
-"\uFF37" => "W"
-
-# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
-"\u0175" => "w"
-
-# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
-"\u01BF" => "w"
-
-# ʍ [LATIN SMALL LETTER TURNED W]
-"\u028D" => "w"
-
-# ẁ [LATIN SMALL LETTER W WITH GRAVE]
-"\u1E81" => "w"
-
-# ẃ [LATIN SMALL LETTER W WITH ACUTE]
-"\u1E83" => "w"
-
-# ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
-"\u1E85" => "w"
-
-# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
-"\u1E87" => "w"
-
-# ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
-"\u1E89" => "w"
-
-# ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
-"\u1E98" => "w"
-
-# ⓦ [CIRCLED LATIN SMALL LETTER W]
-"\u24E6" => "w"
-
-# ⱳ [LATIN SMALL LETTER W WITH HOOK]
-"\u2C73" => "w"
-
-# w [FULLWIDTH LATIN SMALL LETTER W]
-"\uFF57" => "w"
-
-# ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
-"\u24B2" => "(w)"
-
-# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
-"\u1E8A" => "X"
-
-# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
-"\u1E8C" => "X"
-
-# Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
-"\u24CD" => "X"
-
-# X [FULLWIDTH LATIN CAPITAL LETTER X]
-"\uFF38" => "X"
-
-# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
-"\u1D8D" => "x"
-
-# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
-"\u1E8B" => "x"
-
-# ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
-"\u1E8D" => "x"
-
-# ₓ [LATIN SUBSCRIPT SMALL LETTER X]
-"\u2093" => "x"
-
-# ⓧ [CIRCLED LATIN SMALL LETTER X]
-"\u24E7" => "x"
-
-# x [FULLWIDTH LATIN SMALL LETTER X]
-"\uFF58" => "x"
-
-# ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
-"\u24B3" => "(x)"
-
-# Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
-"\u00DD" => "Y"
-
-# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
-"\u0176" => "Y"
-
-# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
-"\u0178" => "Y"
-
-# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
-"\u01B3" => "Y"
-
-# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
-"\u0232" => "Y"
-
-# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
-"\u024E" => "Y"
-
-# ʏ [LATIN LETTER SMALL CAPITAL Y]
-"\u028F" => "Y"
-
-# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
-"\u1E8E" => "Y"
-
-# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
-"\u1EF2" => "Y"
-
-# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
-"\u1EF4" => "Y"
-
-# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
-"\u1EF6" => "Y"
-
-# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
-"\u1EF8" => "Y"
-
-# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
-"\u1EFE" => "Y"
-
-# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
-"\u24CE" => "Y"
-
-# Y [FULLWIDTH LATIN CAPITAL LETTER Y]
-"\uFF39" => "Y"
-
-# ý [LATIN SMALL LETTER Y WITH ACUTE]
-"\u00FD" => "y"
-
-# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
-"\u00FF" => "y"
-
-# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
-"\u0177" => "y"
-
-# ƴ [LATIN SMALL LETTER Y WITH HOOK]
-"\u01B4" => "y"
-
-# ȳ [LATIN SMALL LETTER Y WITH MACRON]
-"\u0233" => "y"
-
-# ɏ [LATIN SMALL LETTER Y WITH STROKE]
-"\u024F" => "y"
-
-# ʎ [LATIN SMALL LETTER TURNED Y]
-"\u028E" => "y"
-
-# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
-"\u1E8F" => "y"
-
-# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
-"\u1E99" => "y"
-
-# ỳ [LATIN SMALL LETTER Y WITH GRAVE]
-"\u1EF3" => "y"
-
-# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
-"\u1EF5" => "y"
-
-# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
-"\u1EF7" => "y"
-
-# ỹ [LATIN SMALL LETTER Y WITH TILDE]
-"\u1EF9" => "y"
-
-# ỿ [LATIN SMALL LETTER Y WITH LOOP]
-"\u1EFF" => "y"
-
-# ⓨ [CIRCLED LATIN SMALL LETTER Y]
-"\u24E8" => "y"
-
-# y [FULLWIDTH LATIN SMALL LETTER Y]
-"\uFF59" => "y"
-
-# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
-"\u24B4" => "(y)"
-
-# Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
-"\u0179" => "Z"
-
-# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
-"\u017B" => "Z"
-
-# Ž [LATIN CAPITAL LETTER Z WITH CARON]
-"\u017D" => "Z"
-
-# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
-"\u01B5" => "Z"
-
-# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
-"\u021C" => "Z"
-
-# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
-"\u0224" => "Z"
-
-# ᴢ [LATIN LETTER SMALL CAPITAL Z]
-"\u1D22" => "Z"
-
-# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
-"\u1E90" => "Z"
-
-# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
-"\u1E92" => "Z"
-
-# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
-"\u1E94" => "Z"
-
-# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
-"\u24CF" => "Z"
-
-# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
-"\u2C6B" => "Z"
-
-# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
-"\uA762" => "Z"
-
-# Z [FULLWIDTH LATIN CAPITAL LETTER Z]
-"\uFF3A" => "Z"
-
-# ź [LATIN SMALL LETTER Z WITH ACUTE]
-"\u017A" => "z"
-
-# ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
-"\u017C" => "z"
-
-# ž [LATIN SMALL LETTER Z WITH CARON]
-"\u017E" => "z"
-
-# ƶ [LATIN SMALL LETTER Z WITH STROKE]
-"\u01B6" => "z"
-
-# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
-"\u021D" => "z"
-
-# ȥ [LATIN SMALL LETTER Z WITH HOOK]
-"\u0225" => "z"
-
-# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
-"\u0240" => "z"
-
-# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
-"\u0290" => "z"
-
-# ʑ [LATIN SMALL LETTER Z WITH CURL]
-"\u0291" => "z"
-
-# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
-"\u1D76" => "z"
-
-# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
-"\u1D8E" => "z"
-
-# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
-"\u1E91" => "z"
-
-# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
-"\u1E93" => "z"
-
-# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
-"\u1E95" => "z"
-
-# ⓩ [CIRCLED LATIN SMALL LETTER Z]
-"\u24E9" => "z"
-
-# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
-"\u2C6C" => "z"
-
-# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
-"\uA763" => "z"
-
-# z [FULLWIDTH LATIN SMALL LETTER Z]
-"\uFF5A" => "z"
-
-# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
-"\u24B5" => "(z)"
-
-# ⁰ [SUPERSCRIPT ZERO]
-"\u2070" => "0"
-
-# ₀ [SUBSCRIPT ZERO]
-"\u2080" => "0"
-
-# ⓪ [CIRCLED DIGIT ZERO]
-"\u24EA" => "0"
-
-# ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
-"\u24FF" => "0"
-
-# 0 [FULLWIDTH DIGIT ZERO]
-"\uFF10" => "0"
-
-# ¹ [SUPERSCRIPT ONE]
-"\u00B9" => "1"
-
-# ₁ [SUBSCRIPT ONE]
-"\u2081" => "1"
-
-# ① [CIRCLED DIGIT ONE]
-"\u2460" => "1"
-
-# ⓵ [DOUBLE CIRCLED DIGIT ONE]
-"\u24F5" => "1"
-
-# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
-"\u2776" => "1"
-
-# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
-"\u2780" => "1"
-
-# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
-"\u278A" => "1"
-
-# 1 [FULLWIDTH DIGIT ONE]
-"\uFF11" => "1"
-
-# ⒈ [DIGIT ONE FULL STOP]
-"\u2488" => "1."
-
-# ⑴ [PARENTHESIZED DIGIT ONE]
-"\u2474" => "(1)"
-
-# ² [SUPERSCRIPT TWO]
-"\u00B2" => "2"
-
-# ₂ [SUBSCRIPT TWO]
-"\u2082" => "2"
-
-# ② [CIRCLED DIGIT TWO]
-"\u2461" => "2"
-
-# ⓶ [DOUBLE CIRCLED DIGIT TWO]
-"\u24F6" => "2"
-
-# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
-"\u2777" => "2"
-
-# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
-"\u2781" => "2"
-
-# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
-"\u278B" => "2"
-
-# 2 [FULLWIDTH DIGIT TWO]
-"\uFF12" => "2"
-
-# ⒉ [DIGIT TWO FULL STOP]
-"\u2489" => "2."
-
-# ⑵ [PARENTHESIZED DIGIT TWO]
-"\u2475" => "(2)"
-
-# ³ [SUPERSCRIPT THREE]
-"\u00B3" => "3"
-
-# ₃ [SUBSCRIPT THREE]
-"\u2083" => "3"
-
-# ③ [CIRCLED DIGIT THREE]
-"\u2462" => "3"
-
-# ⓷ [DOUBLE CIRCLED DIGIT THREE]
-"\u24F7" => "3"
-
-# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
-"\u2778" => "3"
-
-# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
-"\u2782" => "3"
-
-# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
-"\u278C" => "3"
-
-# 3 [FULLWIDTH DIGIT THREE]
-"\uFF13" => "3"
-
-# ⒊ [DIGIT THREE FULL STOP]
-"\u248A" => "3."
-
-# ⑶ [PARENTHESIZED DIGIT THREE]
-"\u2476" => "(3)"
-
-# ⁴ [SUPERSCRIPT FOUR]
-"\u2074" => "4"
-
-# ₄ [SUBSCRIPT FOUR]
-"\u2084" => "4"
-
-# ④ [CIRCLED DIGIT FOUR]
-"\u2463" => "4"
-
-# ⓸ [DOUBLE CIRCLED DIGIT FOUR]
-"\u24F8" => "4"
-
-# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
-"\u2779" => "4"
-
-# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
-"\u2783" => "4"
-
-# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
-"\u278D" => "4"
-
-# 4 [FULLWIDTH DIGIT FOUR]
-"\uFF14" => "4"
-
-# ⒋ [DIGIT FOUR FULL STOP]
-"\u248B" => "4."
-
-# ⑷ [PARENTHESIZED DIGIT FOUR]
-"\u2477" => "(4)"
-
-# ⁵ [SUPERSCRIPT FIVE]
-"\u2075" => "5"
-
-# ₅ [SUBSCRIPT FIVE]
-"\u2085" => "5"
-
-# ⑤ [CIRCLED DIGIT FIVE]
-"\u2464" => "5"
-
-# ⓹ [DOUBLE CIRCLED DIGIT FIVE]
-"\u24F9" => "5"
-
-# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
-"\u277A" => "5"
-
-# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
-"\u2784" => "5"
-
-# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
-"\u278E" => "5"
-
-# 5 [FULLWIDTH DIGIT FIVE]
-"\uFF15" => "5"
-
-# ⒌ [DIGIT FIVE FULL STOP]
-"\u248C" => "5."
-
-# ⑸ [PARENTHESIZED DIGIT FIVE]
-"\u2478" => "(5)"
-
-# ⁶ [SUPERSCRIPT SIX]
-"\u2076" => "6"
-
-# ₆ [SUBSCRIPT SIX]
-"\u2086" => "6"
-
-# ⑥ [CIRCLED DIGIT SIX]
-"\u2465" => "6"
-
-# ⓺ [DOUBLE CIRCLED DIGIT SIX]
-"\u24FA" => "6"
-
-# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
-"\u277B" => "6"
-
-# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
-"\u2785" => "6"
-
-# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
-"\u278F" => "6"
-
-# 6 [FULLWIDTH DIGIT SIX]
-"\uFF16" => "6"
-
-# ⒍ [DIGIT SIX FULL STOP]
-"\u248D" => "6."
-
-# ⑹ [PARENTHESIZED DIGIT SIX]
-"\u2479" => "(6)"
-
-# ⁷ [SUPERSCRIPT SEVEN]
-"\u2077" => "7"
-
-# ₇ [SUBSCRIPT SEVEN]
-"\u2087" => "7"
-
-# ⑦ [CIRCLED DIGIT SEVEN]
-"\u2466" => "7"
-
-# ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
-"\u24FB" => "7"
-
-# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
-"\u277C" => "7"
-
-# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
-"\u2786" => "7"
-
-# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
-"\u2790" => "7"
-
-# 7 [FULLWIDTH DIGIT SEVEN]
-"\uFF17" => "7"
-
-# ⒎ [DIGIT SEVEN FULL STOP]
-"\u248E" => "7."
-
-# ⑺ [PARENTHESIZED DIGIT SEVEN]
-"\u247A" => "(7)"
-
-# ⁸ [SUPERSCRIPT EIGHT]
-"\u2078" => "8"
-
-# ₈ [SUBSCRIPT EIGHT]
-"\u2088" => "8"
-
-# ⑧ [CIRCLED DIGIT EIGHT]
-"\u2467" => "8"
-
-# ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
-"\u24FC" => "8"
-
-# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
-"\u277D" => "8"
-
-# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
-"\u2787" => "8"
-
-# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
-"\u2791" => "8"
-
-# 8 [FULLWIDTH DIGIT EIGHT]
-"\uFF18" => "8"
-
-# ⒏ [DIGIT EIGHT FULL STOP]
-"\u248F" => "8."
-
-# ⑻ [PARENTHESIZED DIGIT EIGHT]
-"\u247B" => "(8)"
-
-# ⁹ [SUPERSCRIPT NINE]
-"\u2079" => "9"
-
-# ₉ [SUBSCRIPT NINE]
-"\u2089" => "9"
-
-# ⑨ [CIRCLED DIGIT NINE]
-"\u2468" => "9"
-
-# ⓽ [DOUBLE CIRCLED DIGIT NINE]
-"\u24FD" => "9"
-
-# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
-"\u277E" => "9"
-
-# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
-"\u2788" => "9"
-
-# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
-"\u2792" => "9"
-
-# 9 [FULLWIDTH DIGIT NINE]
-"\uFF19" => "9"
-
-# ⒐ [DIGIT NINE FULL STOP]
-"\u2490" => "9."
-
-# ⑼ [PARENTHESIZED DIGIT NINE]
-"\u247C" => "(9)"
-
-# ⑩ [CIRCLED NUMBER TEN]
-"\u2469" => "10"
-
-# ⓾ [DOUBLE CIRCLED NUMBER TEN]
-"\u24FE" => "10"
-
-# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
-"\u277F" => "10"
-
-# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
-"\u2789" => "10"
-
-# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
-"\u2793" => "10"
-
-# ⒑ [NUMBER TEN FULL STOP]
-"\u2491" => "10."
-
-# ⑽ [PARENTHESIZED NUMBER TEN]
-"\u247D" => "(10)"
-
-# ⑪ [CIRCLED NUMBER ELEVEN]
-"\u246A" => "11"
-
-# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
-"\u24EB" => "11"
-
-# ⒒ [NUMBER ELEVEN FULL STOP]
-"\u2492" => "11."
-
-# ⑾ [PARENTHESIZED NUMBER ELEVEN]
-"\u247E" => "(11)"
-
-# ⑫ [CIRCLED NUMBER TWELVE]
-"\u246B" => "12"
-
-# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
-"\u24EC" => "12"
-
-# ⒓ [NUMBER TWELVE FULL STOP]
-"\u2493" => "12."
-
-# ⑿ [PARENTHESIZED NUMBER TWELVE]
-"\u247F" => "(12)"
-
-# ⑬ [CIRCLED NUMBER THIRTEEN]
-"\u246C" => "13"
-
-# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
-"\u24ED" => "13"
-
-# ⒔ [NUMBER THIRTEEN FULL STOP]
-"\u2494" => "13."
-
-# ⒀ [PARENTHESIZED NUMBER THIRTEEN]
-"\u2480" => "(13)"
-
-# ⑭ [CIRCLED NUMBER FOURTEEN]
-"\u246D" => "14"
-
-# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
-"\u24EE" => "14"
-
-# ⒕ [NUMBER FOURTEEN FULL STOP]
-"\u2495" => "14."
-
-# ⒁ [PARENTHESIZED NUMBER FOURTEEN]
-"\u2481" => "(14)"
-
-# ⑮ [CIRCLED NUMBER FIFTEEN]
-"\u246E" => "15"
-
-# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
-"\u24EF" => "15"
-
-# ⒖ [NUMBER FIFTEEN FULL STOP]
-"\u2496" => "15."
-
-# ⒂ [PARENTHESIZED NUMBER FIFTEEN]
-"\u2482" => "(15)"
-
-# ⑯ [CIRCLED NUMBER SIXTEEN]
-"\u246F" => "16"
-
-# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
-"\u24F0" => "16"
-
-# ⒗ [NUMBER SIXTEEN FULL STOP]
-"\u2497" => "16."
-
-# ⒃ [PARENTHESIZED NUMBER SIXTEEN]
-"\u2483" => "(16)"
-
-# ⑰ [CIRCLED NUMBER SEVENTEEN]
-"\u2470" => "17"
-
-# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
-"\u24F1" => "17"
-
-# ⒘ [NUMBER SEVENTEEN FULL STOP]
-"\u2498" => "17."
-
-# ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
-"\u2484" => "(17)"
-
-# ⑱ [CIRCLED NUMBER EIGHTEEN]
-"\u2471" => "18"
-
-# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
-"\u24F2" => "18"
-
-# ⒙ [NUMBER EIGHTEEN FULL STOP]
-"\u2499" => "18."
-
-# ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
-"\u2485" => "(18)"
-
-# ⑲ [CIRCLED NUMBER NINETEEN]
-"\u2472" => "19"
-
-# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
-"\u24F3" => "19"
-
-# ⒚ [NUMBER NINETEEN FULL STOP]
-"\u249A" => "19."
-
-# ⒆ [PARENTHESIZED NUMBER NINETEEN]
-"\u2486" => "(19)"
-
-# ⑳ [CIRCLED NUMBER TWENTY]
-"\u2473" => "20"
-
-# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
-"\u24F4" => "20"
-
-# ⒛ [NUMBER TWENTY FULL STOP]
-"\u249B" => "20."
-
-# ⒇ [PARENTHESIZED NUMBER TWENTY]
-"\u2487" => "(20)"
-
-# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
-"\u00AB" => "\""
-
-# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
-"\u00BB" => "\""
-
-# “ [LEFT DOUBLE QUOTATION MARK]
-"\u201C" => "\""
-
-# ” [RIGHT DOUBLE QUOTATION MARK]
-"\u201D" => "\""
-
-# „ [DOUBLE LOW-9 QUOTATION MARK]
-"\u201E" => "\""
-
-# ″ [DOUBLE PRIME]
-"\u2033" => "\""
-
-# ‶ [REVERSED DOUBLE PRIME]
-"\u2036" => "\""
-
-# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
-"\u275D" => "\""
-
-# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
-"\u275E" => "\""
-
-# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
-"\u276E" => "\""
-
-# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
-"\u276F" => "\""
-
-# " [FULLWIDTH QUOTATION MARK]
-"\uFF02" => "\""
-
-# ‘ [LEFT SINGLE QUOTATION MARK]
-"\u2018" => "\'"
-
-# ’ [RIGHT SINGLE QUOTATION MARK]
-"\u2019" => "\'"
-
-# ‚ [SINGLE LOW-9 QUOTATION MARK]
-"\u201A" => "\'"
-
-# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
-"\u201B" => "\'"
-
-# ′ [PRIME]
-"\u2032" => "\'"
-
-# ‵ [REVERSED PRIME]
-"\u2035" => "\'"
-
-# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
-"\u2039" => "\'"
-
-# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
-"\u203A" => "\'"
-
-# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
-"\u275B" => "\'"
-
-# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
-"\u275C" => "\'"
-
-# ' [FULLWIDTH APOSTROPHE]
-"\uFF07" => "\'"
-
-# ‐ [HYPHEN]
-"\u2010" => "-"
-
-# ‑ [NON-BREAKING HYPHEN]
-"\u2011" => "-"
-
-# ‒ [FIGURE DASH]
-"\u2012" => "-"
-
-# – [EN DASH]
-"\u2013" => "-"
-
-# — [EM DASH]
-"\u2014" => "-"
-
-# ⁻ [SUPERSCRIPT MINUS]
-"\u207B" => "-"
-
-# ₋ [SUBSCRIPT MINUS]
-"\u208B" => "-"
-
-# - [FULLWIDTH HYPHEN-MINUS]
-"\uFF0D" => "-"
-
-# ⁅ [LEFT SQUARE BRACKET WITH QUILL]
-"\u2045" => "["
-
-# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
-"\u2772" => "["
-
-# [ [FULLWIDTH LEFT SQUARE BRACKET]
-"\uFF3B" => "["
-
-# ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
-"\u2046" => "]"
-
-# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
-"\u2773" => "]"
-
-# ] [FULLWIDTH RIGHT SQUARE BRACKET]
-"\uFF3D" => "]"
-
-# ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
-"\u207D" => "("
-
-# ₍ [SUBSCRIPT LEFT PARENTHESIS]
-"\u208D" => "("
-
-# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
-"\u2768" => "("
-
-# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
-"\u276A" => "("
-
-# ( [FULLWIDTH LEFT PARENTHESIS]
-"\uFF08" => "("
-
-# ⸨ [LEFT DOUBLE PARENTHESIS]
-"\u2E28" => "(("
-
-# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
-"\u207E" => ")"
-
-# ₎ [SUBSCRIPT RIGHT PARENTHESIS]
-"\u208E" => ")"
-
-# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
-"\u2769" => ")"
-
-# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
-"\u276B" => ")"
-
-# ) [FULLWIDTH RIGHT PARENTHESIS]
-"\uFF09" => ")"
-
-# ⸩ [RIGHT DOUBLE PARENTHESIS]
-"\u2E29" => "))"
-
-# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
-"\u276C" => "<"
-
-# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
-"\u2770" => "<"
-
-# < [FULLWIDTH LESS-THAN SIGN]
-"\uFF1C" => "<"
-
-# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
-"\u276D" => ">"
-
-# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
-"\u2771" => ">"
-
-# > [FULLWIDTH GREATER-THAN SIGN]
-"\uFF1E" => ">"
-
-# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
-"\u2774" => "{"
-
-# { [FULLWIDTH LEFT CURLY BRACKET]
-"\uFF5B" => "{"
-
-# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
-"\u2775" => "}"
-
-# } [FULLWIDTH RIGHT CURLY BRACKET]
-"\uFF5D" => "}"
-
-# ⁺ [SUPERSCRIPT PLUS SIGN]
-"\u207A" => "+"
-
-# ₊ [SUBSCRIPT PLUS SIGN]
-"\u208A" => "+"
-
-# + [FULLWIDTH PLUS SIGN]
-"\uFF0B" => "+"
-
-# ⁼ [SUPERSCRIPT EQUALS SIGN]
-"\u207C" => "="
-
-# ₌ [SUBSCRIPT EQUALS SIGN]
-"\u208C" => "="
-
-# = [FULLWIDTH EQUALS SIGN]
-"\uFF1D" => "="
-
-# ! [FULLWIDTH EXCLAMATION MARK]
-"\uFF01" => "!"
-
-# ‼ [DOUBLE EXCLAMATION MARK]
-"\u203C" => "!!"
-
-# ⁉ [EXCLAMATION QUESTION MARK]
-"\u2049" => "!?"
-
-# # [FULLWIDTH NUMBER SIGN]
-"\uFF03" => "#"
-
-# $ [FULLWIDTH DOLLAR SIGN]
-"\uFF04" => "$"
-
-# ⁒ [COMMERCIAL MINUS SIGN]
-"\u2052" => "%"
-
-# % [FULLWIDTH PERCENT SIGN]
-"\uFF05" => "%"
-
-# & [FULLWIDTH AMPERSAND]
-"\uFF06" => "&"
-
-# ⁎ [LOW ASTERISK]
-"\u204E" => "*"
-
-# * [FULLWIDTH ASTERISK]
-"\uFF0A" => "*"
-
-# , [FULLWIDTH COMMA]
-"\uFF0C" => ","
-
-# . [FULLWIDTH FULL STOP]
-"\uFF0E" => "."
-
-# ⁄ [FRACTION SLASH]
-"\u2044" => "/"
-
-# / [FULLWIDTH SOLIDUS]
-"\uFF0F" => "/"
-
-# : [FULLWIDTH COLON]
-"\uFF1A" => ":"
-
-# ⁏ [REVERSED SEMICOLON]
-"\u204F" => ";"
-
-# ; [FULLWIDTH SEMICOLON]
-"\uFF1B" => ";"
-
-# ? [FULLWIDTH QUESTION MARK]
-"\uFF1F" => "?"
-
-# ⁇ [DOUBLE QUESTION MARK]
-"\u2047" => "??"
-
-# ⁈ [QUESTION EXCLAMATION MARK]
-"\u2048" => "?!"
-
-# @ [FULLWIDTH COMMERCIAL AT]
-"\uFF20" => "@"
-
-# \ [FULLWIDTH REVERSE SOLIDUS]
-"\uFF3C" => "\\"
-
-# ‸ [CARET]
-"\u2038" => "^"
-
-# ^ [FULLWIDTH CIRCUMFLEX ACCENT]
-"\uFF3E" => "^"
-
-# _ [FULLWIDTH LOW LINE]
-"\uFF3F" => "_"
-
-# ⁓ [SWUNG DASH]
-"\u2053" => "~"
-
-# ~ [FULLWIDTH TILDE]
-"\uFF5E" => "~"
-
-################################################################
-# Below is the Perl script used to generate the above mappings #
-# from ASCIIFoldingFilter.java: #
-################################################################
-#
-# #!/usr/bin/perl
-#
-# use warnings;
-# use strict;
-#
-# my @source_chars = ();
-# my @source_char_descriptions = ();
-# my $target = '';
-#
-# while (<>) {
-# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
-# push @source_chars, $1;
-# push @source_char_descriptions, $2;
-# next;
-# }
-# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
-# $target .= $1;
-# next;
-# }
-# if (/break;/) {
-# $target = "\\\"" if ($target eq '"');
-# for my $source_char_num (0..$#source_chars) {
-# print "# $source_char_descriptions[$source_char_num]\n";
-# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
-# }
-# @source_chars = ();
-# @source_char_descriptions = ();
-# $target = '';
-# }
-# }
+++ /dev/null
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Syntax:
-# "source" => "target"
-# "source".length() > 0 (source cannot be empty.)
-# "target".length() >= 0 (target can be empty.)
-
-# example:
-# "À" => "A"
-# "\u00C0" => "A"
-# "\u00C0" => "\u0041"
-# "ß" => "ss"
-# "\t" => " "
-# "\n" => ""
-
-# À => A
-"\u00C0" => "A"
-
-# Á => A
-"\u00C1" => "A"
-
-# Â => A
-"\u00C2" => "A"
-
-# Ã => A
-"\u00C3" => "A"
-
-# Ä => A
-"\u00C4" => "A"
-
-# Å => A
-"\u00C5" => "A"
-
-# Æ => AE
-"\u00C6" => "AE"
-
-# Ç => C
-"\u00C7" => "C"
-
-# È => E
-"\u00C8" => "E"
-
-# É => E
-"\u00C9" => "E"
-
-# Ê => E
-"\u00CA" => "E"
-
-# Ë => E
-"\u00CB" => "E"
-
-# Ì => I
-"\u00CC" => "I"
-
-# Í => I
-"\u00CD" => "I"
-
-# Î => I
-"\u00CE" => "I"
-
-# Ï => I
-"\u00CF" => "I"
-
-# IJ => IJ
-"\u0132" => "IJ"
-
-# Ð => D
-"\u00D0" => "D"
-
-# Ñ => N
-"\u00D1" => "N"
-
-# Ò => O
-"\u00D2" => "O"
-
-# Ó => O
-"\u00D3" => "O"
-
-# Ô => O
-"\u00D4" => "O"
-
-# Õ => O
-"\u00D5" => "O"
-
-# Ö => O
-"\u00D6" => "O"
-
-# Ø => O
-"\u00D8" => "O"
-
-# Œ => OE
-"\u0152" => "OE"
-
-# Þ
-"\u00DE" => "TH"
-
-# Ù => U
-"\u00D9" => "U"
-
-# Ú => U
-"\u00DA" => "U"
-
-# Û => U
-"\u00DB" => "U"
-
-# Ü => U
-"\u00DC" => "U"
-
-# Ý => Y
-"\u00DD" => "Y"
-
-# Ÿ => Y
-"\u0178" => "Y"
-
-# à => a
-"\u00E0" => "a"
-
-# á => a
-"\u00E1" => "a"
-
-# â => a
-"\u00E2" => "a"
-
-# ã => a
-"\u00E3" => "a"
-
-# ä => a
-"\u00E4" => "a"
-
-# å => a
-"\u00E5" => "a"
-
-# æ => ae
-"\u00E6" => "ae"
-
-# ç => c
-"\u00E7" => "c"
-
-# è => e
-"\u00E8" => "e"
-
-# é => e
-"\u00E9" => "e"
-
-# ê => e
-"\u00EA" => "e"
-
-# ë => e
-"\u00EB" => "e"
-
-# ì => i
-"\u00EC" => "i"
-
-# í => i
-"\u00ED" => "i"
-
-# î => i
-"\u00EE" => "i"
-
-# ï => i
-"\u00EF" => "i"
-
-# ij => ij
-"\u0133" => "ij"
-
-# ð => d
-"\u00F0" => "d"
-
-# ñ => n
-"\u00F1" => "n"
-
-# ò => o
-"\u00F2" => "o"
-
-# ó => o
-"\u00F3" => "o"
-
-# ô => o
-"\u00F4" => "o"
-
-# õ => o
-"\u00F5" => "o"
-
-# ö => o
-"\u00F6" => "o"
-
-# ø => o
-"\u00F8" => "o"
-
-# œ => oe
-"\u0153" => "oe"
-
-# ß => ss
-"\u00DF" => "ss"
-
-# þ => th
-"\u00FE" => "th"
-
-# ù => u
-"\u00F9" => "u"
-
-# ú => u
-"\u00FA" => "u"
-
-# û => u
-"\u00FB" => "u"
-
-# ü => u
-"\u00FC" => "u"
-
-# ý => y
-"\u00FD" => "y"
-
-# ÿ => y
-"\u00FF" => "y"
-
-# ff => ff
-"\uFB00" => "ff"
-
-# fi => fi
-"\uFB01" => "fi"
-
-# fl => fl
-"\uFB02" => "fl"
-
-# ffi => ffi
-"\uFB03" => "ffi"
-
-# ffl => ffl
-"\uFB04" => "ffl"
-
-# ſt => ft
-"\uFB05" => "ft"
-
-# st => st
-"\uFB06" => "st"
+++ /dev/null
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default)
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
-
- PERFORMANCE NOTE: this schema includes many optional features and should not
- be used for benchmarking. To improve performance one could
- - set stored="false" for all fields possible (esp large fields) when you
- only need to search on the field but don't need to return the original
- value.
- - set indexed="false" if you don't need to search on the field, but only
- return the field as a result of searching on other indexed fields.
- - remove all unneeded copyField statements
- - for best index size and searching performance, set "index" to false
- for all general text fields, use copyField to copy them to the
- catchall "text" field, and use that for searching.
- - For maximum indexing performance, use the StreamingUpdateSolrServer
- java client.
- - Remember to run the JVM in server mode, and use a higher logging level
- that avoids logging every request
--->
-
-<schema name="Local Unified Index" version="1.2">
- <!-- attribute "name" is the name of this schema and is only used for display purposes.
- Applications should change this to reflect the nature of the search collection.
- version="1.2" is Solr's version number for the schema syntax and semantics. It should
- not normally be changed by applications.
- 1.0: multiValued attribute did not exist, all fields are multiValued by nature
- 1.1: multiValued attribute introduced, false by default
- 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
- -->
-
- <types>
- <!-- field type definitions. The "name" attribute is
- just a label to be used by field definitions. The "class"
- attribute and any other attributes determine the real
- behavior of the fieldType.
- Class names starting with "solr" refer to java classes in the
- org.apache.solr.analysis package.
- -->
-
- <!-- The StrField type is not analyzed, but indexed/stored verbatim.
- - StrField and TextField support an optional compressThreshold which
- limits compression (if enabled in the derived fields) to values which
- exceed a certain size (in characters).
- -->
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
-
- <!-- boolean type: "true" or "false" -->
- <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
- <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
- <fieldtype name="binary" class="solr.BinaryField"/>
-
- <!-- The optional sortMissingLast and sortMissingFirst attributes are
- currently supported on types that are sorted internally as strings.
- This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
- - If sortMissingLast="true", then a sort on this field will cause documents
- without the field to come after documents with the field,
- regardless of the requested sort order (asc or desc).
- - If sortMissingFirst="true", then a sort on this field will cause documents
- without the field to come before documents with the field,
- regardless of the requested sort order.
- - If sortMissingLast="false" and sortMissingFirst="false" (the default),
- then default lucene sorting will be used which places docs without the
- field first in an ascending sort and last in a descending sort.
- -->
-
- <!--
- Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
- -->
- <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
-
- <!--
- Numeric field types that index each value at various levels of precision
- to accelerate range queries when the number of values between the range
- endpoints is large. See the javadoc for NumericRangeQuery for internal
- implementation details.
-
- Smaller precisionStep values (specified in bits) will lead to more tokens
- indexed per value, slightly larger index size, and faster range queries.
- A precisionStep of 0 disables indexing at different precision levels.
- -->
- <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-
- <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
- is a more restricted form of the canonical representation of dateTime
- http://www.w3.org/TR/xmlschema-2/#dateTime
- The trailing "Z" designates UTC time and is mandatory.
- Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
- All other components are mandatory.
-
- Expressions can also be used to denote calculations that should be
- performed relative to "NOW" to determine the value, ie...
-
- NOW/HOUR
- ... Round to the start of the current hour
- NOW-1DAY
- ... Exactly 1 day prior to now
- NOW/DAY+6MONTHS+3DAYS
- ... 6 months and 3 days in the future from the start of
- the current day
-
- Consult the DateField javadocs for more information.
-
- Note: For faster range queries, consider the tdate type
- -->
- <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
-
- <!-- A Trie based date field for faster date range queries and date faceting. -->
- <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
-
- <!-- Test of new facet type that would support case-insensitive facetting -->
- <fieldType name="facet" class="solr.TextField" sortMissingLast="true" omitNorms="true">
- <analyzer>
- <!-- KeywordTokenizer does no actual tokenizing, so the entire
- input string is preserved as a single token
- -->
- <tokenizer class="solr.KeywordTokenizerFactory"/>
- <!-- The LowerCase TokenFilter does what you expect, which can be
- when you want your sorting to be case insensitive
- -->
- <filter class="solr.LowerCaseFilterFactory" />
- <!-- The TrimFilter removes any leading or trailing whitespace -->
- <filter class="solr.TrimFilterFactory" />
- </analyzer>
- </fieldType>
-
- <!--
- Note:
- These should only be used for compatibility with existing indexes (created with older Solr versions)
- or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
-
- Plain numeric field types that store and index the text
- value verbatim (and hence don't support range queries, since the
- lexicographic ordering isn't equal to the numeric ordering)
- -->
- <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
- <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
- <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
- <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
- <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
-
-
- <!--
- Note:
- These should only be used for compatibility with existing indexes (created with older Solr versions)
- or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
-
- Numeric field types that manipulate the value into
- a string value that isn't human-readable in its internal form,
- but with a lexicographic ordering the same as the numeric ordering,
- so that range queries work correctly.
- -->
- <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
- <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
- <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
- <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
-
-
- <!-- The "RandomSortField" is not used to store or search any
- data. You can declare fields of this type it in your schema
- to generate pseudo-random orderings of your docs for sorting
- purposes. The ordering is generated based on the field name
- and the version of the index, As long as the index version
- remains unchanged, and the same field name is reused,
- the ordering of the docs will be consistent.
- If you want different psuedo-random orderings of documents,
- for the same version of the index, use a dynamicField and
- change the name
- -->
- <fieldType name="random" class="solr.RandomSortField" indexed="true" />
-
- <!-- solr.TextField allows the specification of custom text analyzers
- specified as a tokenizer and a list of token filters. Different
- analyzers may be specified for indexing and querying.
-
- The optional positionIncrementGap puts space between multiple fields of
- this type on the same document, with the purpose of preventing false phrase
- matching across fields.
-
- For more info on customizing your analyzer chain, please see
- http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
- -->
-
- <!-- One can also specify an existing Analyzer class that has a
- default constructor via the class attribute on the analyzer element
- <fieldType name="text_greek" class="solr.TextField">
- <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
- </fieldType>
- -->
-
- <!-- A text field that only splits on whitespace for exact matching of words -->
- <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
- words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
- so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
- Synonyms and stopwords are customized by external files, and stemming is enabled.
- -->
- <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <!-- Case insensitive stop word removal.
- add enablePositionIncrements=true in both the index and query
- analyzers to leave a 'gap' for more accurate phrase queries.
- -->
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="stopwords.txt"
- enablePositionIncrements="true"
- />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="stopwords.txt"
- enablePositionIncrements="true"
- />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
- </analyzer>
- </fieldType>
-
-
- <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
- but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
- <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
- <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
- possible with WordDelimiterFilter in conjuncton with stemming. -->
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- </analyzer>
- </fieldType>
-
-
- <!-- A general unstemmed text field - good if one does not know the language of the field -->
- <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="stopwords.txt"
- enablePositionIncrements="true"
- />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- </fieldType>
-
-
- <!-- A general unstemmed text field that indexes tokens normally and also
- reversed (via ReversedWildcardFilterFactory), to enable more efficient
- leading wildcard queries. -->
- <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
- maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="stopwords.txt"
- enablePositionIncrements="true"
- />
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- charFilter + WhitespaceTokenizer -->
- <!--
- <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
- <analyzer>
- <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- </analyzer>
- </fieldType>
- -->
-
- <!-- This is an example of using the KeywordTokenizer along
- With various TokenFilterFactories to produce a sortable field
- that does not include some properties of the source text
- -->
- <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
- <analyzer>
- <!-- KeywordTokenizer does no actual tokenizing, so the entire
- input string is preserved as a single token
- -->
- <tokenizer class="solr.KeywordTokenizerFactory"/>
- <!-- The LowerCase TokenFilter does what you expect, which can be
- when you want your sorting to be case insensitive
- -->
- <filter class="solr.LowerCaseFilterFactory" />
- <!-- The TrimFilter removes any leading or trailing whitespace -->
- <filter class="solr.TrimFilterFactory" />
- <!-- The PatternReplaceFilter gives you the flexibility to use
- Java Regular expression to replace any sequence of characters
- matching a pattern with an arbitrary replacement string,
- which may include back references to portions of the original
- string matched by the pattern.
-
- See the Java Regular Expression documentation for more
- information on pattern and replacement string syntax.
-
- http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
- -->
- <filter class="solr.PatternReplaceFilterFactory"
- pattern="([^a-z])" replacement="" replace="all"
- />
- </analyzer>
- </fieldType>
-
- <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
- <analyzer>
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
- </analyzer>
- </fieldtype>
-
- <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <!--
- The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
- a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
- Attributes of the DelimitedPayloadTokenFilterFactory :
- "delimiter" - a one character delimiter. Default is | (pipe)
- "encoder" - how to encode the following value into a playload
- float -> org.apache.lucene.analysis.payloads.FloatEncoder,
- integer -> o.a.l.a.p.IntegerEncoder
- identity -> o.a.l.a.p.IdentityEncoder
- Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
- -->
- <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
- </analyzer>
- </fieldtype>
-
- <!-- lowercases the entire field value, keeping it as a single token. -->
- <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.KeywordTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory" />
- </analyzer>
- </fieldType>
-
-
- <!-- since fields of this type are by default not stored or indexed,
- any data added to them will be ignored outright. -->
- <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
-
- <!-- subject -->
- <fieldType name="subject" class="solr.StrField" positionIncrementGap="100">
-<!--
- <analyzer>
- <tokenizer class="solr.PatternTokenizerFactory" pattern=";" />
- <filter class="solr.StandardFilterFactory" />
- <filter class="solr.TrimFilterFactory" />
- </analyzer>
--->
- </fieldType>
-
- <!-- Author type -->
- <fieldType name="author" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.PatternTokenizerFactory" pattern=";" />
- <filter class="solr.StandardFilterFactory" />
- <filter class="solr.TrimFilterFactory" />
-<!--
- <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
--->
- </analyzer>
- </fieldType>
-
-
- </types>
-
-
- <fields>
- <!-- Valid attributes for fields:
- name: mandatory - the name for the field
- type: mandatory - the name of a previously defined type from the
- <types> section
- indexed: true if this field should be indexed (searchable or sortable)
- stored: true if this field should be retrievable
- compressed: [false] if this field should be stored using gzip compression
- (this will only apply if the field type is compressable; among
- the standard field types, only TextField and StrField are)
- multiValued: true if this field may contain multiple values per document
- omitNorms: (expert) set to true to omit the norms associated with
- this field (this disables length normalization and index-time
- boosting for the field, and saves some memory). Only full-text
- fields or fields that need an index-time boost need norms.
- termVectors: [false] set to true to store the term vector for a
- given field.
- When using MoreLikeThis, fields used for similarity should be
- stored for best performance.
- termPositions: Store position information with the term vector.
- This will increase storage costs.
- termOffsets: Store offset information with the term vector. This
- will increase storage costs.
- default: a value that should be used if no value is specified
- when adding a document.
- -->
-
-<!-- -->
- <field name="id" type="string" indexed="true" stored="true" required="true" />
- <field name="_version_" type="long" indexed="true" stored="true"/>
-
- <field name="transactionId" type="long" indexed="true" stored="false"/>
-
- <field name="author" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="author_exact" type="string" indexed="true" stored="false" multiValued="true" omitNorms="true" docValues="true" />
- <field name="author-date" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="author-title" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="corporate-date" type="text" indexed="true" stored="true" omitNorms="true"/>
- <field name="corporate-location" type="text" indexed="true" stored="true" omitNorms="true"/>
- <field name="corporate-name" type="text" indexed="true" stored="true" omitNorms="true"/>
-
- <field name="callnumber" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="citation" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="date" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="description" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="edition" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="electronic-format-instruction" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="electronic-format-type" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="electronic-note" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="electronic-text" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="electronic-url" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="isbn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="issn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="lccn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="medium" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
- <field name="medium_exact" type="facet" indexed="true" stored="false" multiValued="true" omitNorms="true" />
-
- <field name="meeting-date" type="text" indexed="true" stored="true" omitNorms="true"/>
- <field name="meeting-location" type="text" indexed="true" stored="true" omitNorms="true"/>
- <field name="meeting-name" type="text" indexed="true" stored="true" omitNorms="true"/>
-
- <field name="series-title" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="subject" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" />
- <field name="subject_exact" type="string" indexed="true" stored="false" multiValued="true" omitNorms="true" docValues="true" />
- <field name="subject-long" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" />
-
- <field name="system-control-nr" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
-
- <field name="tech-rep-nr" type="text" indexed="true" multiValued="true" stored="true"/>
-
- <field name="title" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-complete" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-dates" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-medium" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-number-section" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-remainder" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-responsibility" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-uniform" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-uniform-key" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-uniform-media" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-uniform-partname" type="text" indexed="true" multiValued="true" stored="true"/>
- <field name="title-uniform-parts" type="text" indexed="true" multiValued="true" stored="true"/>
-
- <field name="journal-title" type="text" indexed="true" multiValued="true" stored="true" />
- <field name="journal-title_exact" type="text" indexed="true" multiValued="true" stored="false"/>
-
- <field name="physical-accomp" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="physical-dimensions" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="physical-extent" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="physical-format" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="physical-specified" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="physical-unitsize" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="physical-unittype" type="text" indexed="true" stored="true" multiValued="true" />
-
- <field name="publication-date" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="publication-name" type="text" indexed="true" stored="true" multiValued="true" />
- <field name="publication-place" type="text" indexed="true" stored="true" multiValued="true" />
-
-
- <!-- Common metadata fields, named specifically to match up with
- SolrCell metadata when parsing rich documents such as Word, PDF.
- Some fields are multiValued only because Tika currently may return
- multiple values for them.
- -->
-<!--
- <field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
- <field name="subject" type="text" indexed="true" stored="true"/>
- <field name="description" type="text" indexed="true" stored="true" multiValued="true"/>
- <field name="comments" type="text" indexed="true" stored="true"/>
- <field name="author" type="textgen" indexed="true" stored="true"/>
- <field name="keywords" type="textgen" indexed="true" stored="true"/>
- <field name="category" type="textgen" indexed="true" stored="true"/>
- <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
--->
- <field name="harvest-timestamp" type="date" indexed="true" stored="true"/>
- <field name="harvest-date" type="string" indexed="true" stored="true"/>
-
- <!-- catchall field, containing all other searchable text fields (implemented
- via copyField further on in this schema -->
- <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
-
- <!-- catchall text field that indexes tokens both normally and in reverse for efficient
- leading wildcard queries. -->
- <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
-
- <field name="payloads" type="payloads" indexed="true" stored="true"/>
-
- <!-- Uncommenting the following will create a "timestamp" field using
- a default value of "NOW" to indicate when each document was indexed.
- -->
- <!--
- <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
- -->
-
-
- <!-- Dynamic field definitions. If a field name is not found, dynamicFields
- will be used if the name matches any of the patterns.
- RESTRICTION: the glob-like pattern in the name attribute must have
- a "*" only at the start or the end.
- EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
- Longer patterns will be matched first. if equal size patterns
- both match, the first appearing in the schema will be used. -->
- <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
- <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
- <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
- <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
- <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
- <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
- <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
- <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
-
- <!-- some trie-coded dynamic fields for faster range queries -->
- <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
- <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
- <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
- <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
- <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
-
- <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
-
- <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
- <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
-
- <dynamicField name="random_*" type="random" />
-
- <!-- uncomment the following to ignore any fields that don't already match an existing
- field name or dynamic field, rather than reporting them as an error.
- alternately, change the type="ignored" to some other type e.g. "text" if you want
- unknown fields indexed and/or stored by default -->
- <dynamicField name="*" type="text" multiValued="true" />
-
- </fields>
-
- <!-- Field to use to determine and enforce document uniqueness.
- Unless this field is marked with required="false", it will be a required field
- -->
- <uniqueKey>id</uniqueKey>
-
- <!-- field for the QueryParser to use when an explicit fieldname is absent -->
- <defaultSearchField>text</defaultSearchField>
-
- <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
- <solrQueryParser defaultOperator="AND"/>
-
- <!-- copyField commands copy one field to another at the time a document
- is added to the index. It's used either to index the same field differently,
- or to add multiple fields to the same field for easier/faster searching. -->
-
- <copyField source="author" dest="author_exact"/>
- <copyField source="subject" dest="subject_exact"/>
- <copyField source="medium" dest="medium_exact"/>
- <copyField source="journal-title" dest="journal-title_exact"/>
-
- <!-- Above, multiple source fields are copied to the [text] field.
- Another way to map multiple source fields to the same
- destination field is to use the dynamic field syntax.
- copyField also supports a maxChars to copy setting. -->
-
- <copyField source="*" dest="text"/>
-
- <!-- copy name to alphaNameSort, a field designed for sorting by name -->
- <!-- <copyField source="name" dest="alphaNameSort"/> -->
-
-
- <!-- Similarity is the scoring routine for each document vs. a query.
- A custom similarity may be specified here, but the default is fine
- for most applications. -->
- <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
- <!-- ... OR ...
- Specify a SimilarityFactory class name implementation
- allowing parameters to be used.
- -->
- <!--
- <similarity class="com.example.solr.CustomSimilarityFactory">
- <str name="paramkey">param value</str>
- </similarity>
- -->
-</schema>
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-user=
-solr_hostname=localhost
-solr_port=8983
-rsyncd_port=18983
-data_dir=
-webapp_name=solr
-master_host=
-master_data_dir=
-master_status_dir=
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- For more details about configurations options that may appear in
- this file, see http://wiki.apache.org/solr/SolrConfigXml.
--->
-<config>
- <!-- In all configuration below, a prefix of "solr." for class names
- is an alias that causes solr to search appropriate packages,
- including org.apache.solr.(search|update|request|core|analysis)
-
- You may also specify a fully qualified Java classname if you
- have your own custom plugins.
- -->
-
- <!-- Controls what version of Lucene various components of Solr
- adhere to. Generally, you want to use the latest version to
- get all bug fixes and improvements. It is highly recommended
- that you fully re-index after changing this setting as it can
- affect both how text is indexed and queried.
- -->
- <luceneMatchVersion>4.4</luceneMatchVersion>
-
- <!-- <lib/> directives can be used to instruct Solr to load an Jars
- identified and use them to resolve any "plugins" specified in
- your solrconfig.xml or schema.xml (ie: Analyzers, Request
- Handlers, etc...).
-
- All directories and paths are resolved relative to the
- instanceDir.
-
- Please note that <lib/> directives are processed in the order
- that they appear in your solrconfig.xml file, and are "stacked"
- on top of each other when building a ClassLoader - so if you have
- plugin jars with dependencies on other jars, the "lower level"
- dependency jars should be loaded first.
-
- If a "./lib" directory exists in your instanceDir, all files
- found in it are included as if you had used the following
- syntax...
-
- <lib dir="./lib" />
- -->
-
- <!-- A 'dir' option by itself adds any files found in the directory
- to the classpath, this is useful for including all jars in a
- directory.
-
- When a 'regex' is specified in addition to a 'dir', only the
- files in that directory which completely match the regex
- (anchored on both ends) will be included.
-
- If a 'dir' option (with or without a regex) is used and nothing
- is found that matches, a warning will be logged.
-
- The examples below can be used to load some solr-contribs along
- with their external dependencies.
- -->
-<!--
- <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
-
- <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
-
- <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
-
- <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
--->
- <!-- an exact 'path' can be used instead of a 'dir' to specify a
- specific jar file. This will cause a serious error to be logged
- if it can't be loaded.
- -->
- <!--
- <lib path="../a-jar-that-does-not-exist.jar" />
- -->
-
- <!-- Data Directory
-
- Used to specify an alternate directory to hold all index data
- other than the default ./data under the Solr home. If
- replication is in use, this should match the replication
- configuration.
- -->
- <dataDir>${solr.data.dir:}</dataDir>
-
-
- <!-- The DirectoryFactory to use for indexes.
-
- solr.StandardDirectoryFactory is filesystem
- based and tries to pick the best implementation for the current
- JVM and platform. solr.NRTCachingDirectoryFactory, the default,
- wraps solr.StandardDirectoryFactory and caches small files in memory
- for better NRT performance.
-
- One can force a particular implementation via solr.MMapDirectoryFactory,
- solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
-
- solr.RAMDirectoryFactory is memory based, not
- persistent, and doesn't work with replication.
- -->
- <directoryFactory name="DirectoryFactory"
- class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
-
- <!-- The CodecFactory for defining the format of the inverted index.
- The default implementation is SchemaCodecFactory, which is the official Lucene
- index format, but hooks into the schema to provide per-field customization of
- the postings lists and per-document values in the fieldType element
- (postingsFormat/docValuesFormat). Note that most of the alternative implementations
- are experimental, so if you choose to customize the index format, its a good
- idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
- before upgrading to a newer version to avoid unnecessary reindexing.
- -->
- <codecFactory class="solr.SchemaCodecFactory"/>
-
- <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
-
- <schemaFactory class="ManagedIndexSchemaFactory">
- <bool name="mutable">true</bool>
- <str name="managedSchemaResourceName">managed-schema</str>
- </schemaFactory>
-
- When ManagedIndexSchemaFactory is specified, Solr will load the schema from
- he resource named in 'managedSchemaResourceName', rather than from schema.xml.
- Note that the managed schema resource CANNOT be named schema.xml. If the managed
- schema does not exist, Solr will create it after reading schema.xml, then rename
- 'schema.xml' to 'schema.xml.bak'.
-
- Do NOT hand edit the managed schema - external modifications will be ignored and
- overwritten as a result of schema modification REST API calls.
-
- When ManagedIndexSchemaFactory is specified with mutable = true, schema
- modification REST API calls will be allowed; otherwise, error responses will be
- sent back for these requests.
- -->
- <schemaFactory class="ClassicIndexSchemaFactory"/>
-
- <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Index Config - These settings control low-level behavior of indexing
- Most example settings here show the default value, but are commented
- out, to more easily see where customizations have been made.
-
- Note: This replaces <indexDefaults> and <mainIndex> from older versions
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
- <indexConfig>
- <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
- LimitTokenCountFilterFactory in your fieldType definition. E.g.
- <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
- -->
- <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
- <!-- <writeLockTimeout>1000</writeLockTimeout> -->
-
- <!-- The maximum number of simultaneous threads that may be
- indexing documents at once in IndexWriter; if more than this
- many threads arrive they will wait for others to finish.
- Default in Solr/Lucene is 8. -->
- <!-- <maxIndexingThreads>8</maxIndexingThreads> -->
-
- <!-- Expert: Enabling compound file will use less files for the index,
- using fewer file descriptors on the expense of performance decrease.
- Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
- <!-- <useCompoundFile>false</useCompoundFile> -->
-
- <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
- indexing for buffering added documents and deletions before they are
- flushed to the Directory.
- maxBufferedDocs sets a limit on the number of documents buffered
- before flushing.
- If both ramBufferSizeMB and maxBufferedDocs is set, then
- Lucene will flush based on whichever limit is hit first.
- The default is 100 MB. -->
- <!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
- <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
-
- <!-- Expert: Merge Policy
- The Merge Policy in Lucene controls how merging of segments is done.
- The default since Solr/Lucene 3.3 is TieredMergePolicy.
- The default since Lucene 2.3 was the LogByteSizeMergePolicy,
- Even older versions of Lucene used LogDocMergePolicy.
- -->
- <!--
- <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
- <int name="maxMergeAtOnce">10</int>
- <int name="segmentsPerTier">10</int>
- </mergePolicy>
- -->
-
- <!-- Merge Factor
- The merge factor controls how many segments will get merged at a time.
- For TieredMergePolicy, mergeFactor is a convenience parameter which
- will set both MaxMergeAtOnce and SegmentsPerTier at once.
- For LogByteSizeMergePolicy, mergeFactor decides how many new segments
- will be allowed before they are merged into one.
- Default is 10 for both merge policies.
- -->
- <!--
- <mergeFactor>10</mergeFactor>
- -->
-
- <!-- Expert: Merge Scheduler
- The Merge Scheduler in Lucene controls how merges are
- performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
- can perform merges in the background using separate threads.
- The SerialMergeScheduler (Lucene 2.2 default) does not.
- -->
- <!--
- <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
- -->
-
- <!-- LockFactory
-
- This option specifies which Lucene LockFactory implementation
- to use.
-
- single = SingleInstanceLockFactory - suggested for a
- read-only index or when there is no possibility of
- another process trying to modify the index.
- native = NativeFSLockFactory - uses OS native file locking.
- Do not use when multiple solr webapps in the same
- JVM are attempting to share a single index.
- simple = SimpleFSLockFactory - uses a plain file for locking
-
- Defaults: 'native' is default for Solr3.6 and later, otherwise
- 'simple' is the default
-
- More details on the nuances of each LockFactory...
- http://wiki.apache.org/lucene-java/AvailableLockFactories
- -->
- <lockType>${solr.lock.type:native}</lockType>
-
- <!-- Unlock On Startup
-
- If true, unlock any held write or commit locks on startup.
- This defeats the locking mechanism that allows multiple
- processes to safely access a lucene index, and should be used
- with care. Default is "false".
-
- This is not needed if lock type is 'single'
- -->
- <!--
- <unlockOnStartup>false</unlockOnStartup>
- -->
-
- <!-- Expert: Controls how often Lucene loads terms into memory
- Default is 128 and is likely good for most everyone.
- -->
- <!-- <termIndexInterval>128</termIndexInterval> -->
-
- <!-- If true, IndexReaders will be reopened (often more efficient)
- instead of closed and then opened. Default: true
- -->
- <!--
- <reopenReaders>true</reopenReaders>
- -->
-
- <!-- Commit Deletion Policy
- Custom deletion policies can be specified here. The class must
- implement org.apache.lucene.index.IndexDeletionPolicy.
-
- The default Solr IndexDeletionPolicy implementation supports
- deleting index commit points on number of commits, age of
- commit point and optimized status.
-
- The latest commit point should always be preserved regardless
- of the criteria.
- -->
- <!--
- <deletionPolicy class="solr.SolrDeletionPolicy">
- -->
- <!-- The number of commit points to be kept -->
- <!-- <str name="maxCommitsToKeep">1</str> -->
- <!-- The number of optimized commit points to be kept -->
- <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
- <!--
- Delete all commit points once they have reached the given age.
- Supports DateMathParser syntax e.g.
- -->
- <!--
- <str name="maxCommitAge">30MINUTES</str>
- <str name="maxCommitAge">1DAY</str>
- -->
- <!--
- </deletionPolicy>
- -->
-
- <!-- Lucene Infostream
-
- To aid in advanced debugging, Lucene provides an "InfoStream"
- of detailed information when indexing.
-
- Setting the value to true will instruct the underlying Lucene
- IndexWriter to write its info stream to solr's log. By default,
- this is enabled here, and controlled through log4j.properties.
- -->
- <infoStream>true</infoStream>
- </indexConfig>
-
-
- <!-- JMX
-
- This example enables JMX if and only if an existing MBeanServer
- is found, use this if you want to configure JMX through JVM
- parameters. Remove this to disable exposing Solr configuration
- and statistics to JMX.
-
- For more details see http://wiki.apache.org/solr/SolrJmx
- -->
- <jmx />
- <!-- If you want to connect to a particular server, specify the
- agentId
- -->
- <!-- <jmx agentId="myAgent" /> -->
- <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
- <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
- -->
-
- <!-- The default high-performance update handler -->
- <updateHandler class="solr.DirectUpdateHandler2">
-
- <!-- Enables a transaction log, used for real-time get, durability, and
- and solr cloud replica recovery. The log can grow as big as
- uncommitted changes to the index, so use of a hard autoCommit
- is recommended (see below).
- "dir" - the target directory for transaction logs, defaults to the
- solr data directory. -->
- <updateLog>
- <str name="dir">${solr.ulog.dir:}</str>
- </updateLog>
-
- <!-- AutoCommit
-
- Perform a hard commit automatically under certain conditions.
- Instead of enabling autoCommit, consider using "commitWithin"
- when adding documents.
-
- http://wiki.apache.org/solr/UpdateXmlMessages
-
- maxDocs - Maximum number of documents to add since the last
- commit before automatically triggering a new commit.
-
- maxTime - Maximum amount of time in ms that is allowed to pass
- since a document was added before automatically
- triggering a new commit.
- openSearcher - if false, the commit causes recent index changes
- to be flushed to stable storage, but does not cause a new
- searcher to be opened to make those changes visible.
-
- If the updateLog is enabled, then it's highly recommended to
- have some sort of hard autoCommit to limit the log size.
- -->
- <autoCommit>
- <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
- <openSearcher>false</openSearcher>
- </autoCommit>
-
- <!-- softAutoCommit is like autoCommit except it causes a
- 'soft' commit which only ensures that changes are visible
- but does not ensure that data is synced to disk. This is
- faster and more near-realtime friendly than a hard commit.
- -->
-
- <autoSoftCommit>
- <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
- </autoSoftCommit>
-
- <!-- Update Related Event Listeners
-
- Various IndexWriter related events can trigger Listeners to
- take actions.
-
- postCommit - fired after every commit or optimize command
- postOptimize - fired after every optimize command
- -->
- <!-- The RunExecutableListener executes an external command from a
- hook such as postCommit or postOptimize.
-
- exe - the name of the executable to run
- dir - dir to use as the current working directory. (default=".")
- wait - the calling thread waits until the executable returns.
- (default="true")
- args - the arguments to pass to the program. (default is none)
- env - environment variables to set. (default is none)
- -->
- <!-- This example shows how RunExecutableListener could be used
- with the script based replication...
- http://wiki.apache.org/solr/CollectionDistribution
- -->
- <!--
- <listener event="postCommit" class="solr.RunExecutableListener">
- <str name="exe">solr/bin/snapshooter</str>
- <str name="dir">.</str>
- <bool name="wait">true</bool>
- <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
- <arr name="env"> <str>MYVAR=val1</str> </arr>
- </listener>
- -->
-
- </updateHandler>
-
- <!-- IndexReaderFactory
-
- Use the following format to specify a custom IndexReaderFactory,
- which allows for alternate IndexReader implementations.
-
- ** Experimental Feature **
-
- Please note - Using a custom IndexReaderFactory may prevent
- certain other features from working. The API to
- IndexReaderFactory may change without warning or may even be
- removed from future releases if the problems cannot be
- resolved.
-
-
- ** Features that may not work with custom IndexReaderFactory **
-
- The ReplicationHandler assumes a disk-resident index. Using a
- custom IndexReader implementation may cause incompatibility
- with ReplicationHandler and may cause replication to not work
- correctly. See SOLR-1366 for details.
-
- -->
- <!--
- <indexReaderFactory name="IndexReaderFactory" class="package.class">
- <str name="someArg">Some Value</str>
- </indexReaderFactory >
- -->
- <!-- By explicitly declaring the Factory, the termIndexDivisor can
- be specified.
- -->
- <!--
- <indexReaderFactory name="IndexReaderFactory"
- class="solr.StandardIndexReaderFactory">
- <int name="setTermIndexDivisor">12</int>
- </indexReaderFactory >
- -->
-
- <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Query section - these settings control query time things like caches
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
- <query>
- <!-- Max Boolean Clauses
-
- Maximum number of clauses in each BooleanQuery, an exception
- is thrown if exceeded.
-
- ** WARNING **
-
- This option actually modifies a global Lucene property that
- will affect all SolrCores. If multiple solrconfig.xml files
- disagree on this property, the value at any given moment will
- be based on the last SolrCore to be initialized.
-
- -->
- <maxBooleanClauses>1024</maxBooleanClauses>
-
-
- <!-- Solr Internal Query Caches
-
- There are two implementations of cache available for Solr,
- LRUCache, based on a synchronized LinkedHashMap, and
- FastLRUCache, based on a ConcurrentHashMap.
-
- FastLRUCache has faster gets and slower puts in single
- threaded operation and thus is generally faster than LRUCache
- when the hit ratio of the cache is high (> 75%), and may be
- faster under other scenarios on multi-cpu systems.
- -->
-
- <!-- Filter Cache
-
- Cache used by SolrIndexSearcher for filters (DocSets),
- unordered sets of *all* documents that match a query. When a
- new searcher is opened, its caches may be prepopulated or
- "autowarmed" using data from caches in the old searcher.
- autowarmCount is the number of items to prepopulate. For
- LRUCache, the autowarmed items will be the most recently
- accessed items.
-
- Parameters:
- class - the SolrCache implementation LRUCache or
- (LRUCache or FastLRUCache)
- size - the maximum number of entries in the cache
- initialSize - the initial capacity (number of entries) of
- the cache. (see java.util.HashMap)
- autowarmCount - the number of entries to prepopulate from
- and old cache.
- -->
- <filterCache class="solr.FastLRUCache"
- size="512"
- initialSize="512"
- autowarmCount="0"/>
-
- <!-- Query Result Cache
-
- Caches results of searches - ordered lists of document ids
- (DocList) based on a query, a sort, and the range of documents requested.
- -->
- <queryResultCache class="solr.LRUCache"
- size="512"
- initialSize="512"
- autowarmCount="0"/>
-
- <!-- Document Cache
-
- Caches Lucene Document objects (the stored fields for each
- document). Since Lucene internal document ids are transient,
- this cache will not be autowarmed.
- -->
- <documentCache class="solr.LRUCache"
- size="512"
- initialSize="512"
- autowarmCount="0"/>
-
- <!-- Field Value Cache
-
- Cache used to hold field values that are quickly accessible
- by document id. The fieldValueCache is created by default
- even if not configured here.
- -->
- <!--
- <fieldValueCache class="solr.FastLRUCache"
- size="512"
- autowarmCount="128"
- showItems="32" />
- -->
-
- <!-- Custom Cache
-
- Example of a generic cache. These caches may be accessed by
- name through SolrIndexSearcher.getCache(),cacheLookup(), and
- cacheInsert(). The purpose is to enable easy caching of
- user/application level data. The regenerator argument should
- be specified as an implementation of solr.CacheRegenerator
- if autowarming is desired.
- -->
- <!--
- <cache name="myUserCache"
- class="solr.LRUCache"
- size="4096"
- initialSize="1024"
- autowarmCount="1024"
- regenerator="com.mycompany.MyRegenerator"
- />
- -->
-
-
- <!-- Lazy Field Loading
-
- If true, stored fields that are not requested will be loaded
- lazily. This can result in a significant speed improvement
- if the usual case is to not load all stored fields,
- especially if the skipped fields are large compressed text
- fields.
- -->
- <enableLazyFieldLoading>true</enableLazyFieldLoading>
-
- <!-- Use Filter For Sorted Query
-
- A possible optimization that attempts to use a filter to
- satisfy a search. If the requested sort does not include
- score, then the filterCache will be checked for a filter
- matching the query. If found, the filter will be used as the
- source of document ids, and then the sort will be applied to
- that.
-
- For most situations, this will not be useful unless you
- frequently get the same search repeatedly with different sort
- options, and none of them ever use "score"
- -->
- <!--
- <useFilterForSortedQuery>true</useFilterForSortedQuery>
- -->
-
- <!-- Result Window Size
-
- An optimization for use with the queryResultCache. When a search
- is requested, a superset of the requested number of document ids
- are collected. For example, if a search for a particular query
- requests matching documents 10 through 19, and queryWindowSize is 50,
- then documents 0 through 49 will be collected and cached. Any further
- requests in that range can be satisfied via the cache.
- -->
- <queryResultWindowSize>20</queryResultWindowSize>
-
- <!-- Maximum number of documents to cache for any entry in the
- queryResultCache.
- -->
- <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
-
- <!-- Query Related Event Listeners
-
- Various IndexSearcher related events can trigger Listeners to
- take actions.
-
- newSearcher - fired whenever a new searcher is being prepared
- and there is a current searcher handling requests (aka
- registered). It can be used to prime certain caches to
- prevent long request times for certain requests.
-
- firstSearcher - fired whenever a new searcher is being
- prepared but there is no current registered searcher to handle
- requests or to gain autowarming data from.
-
-
- -->
- <!-- QuerySenderListener takes an array of NamedList and executes a
- local query request for each NamedList in sequence.
- -->
- <listener event="newSearcher" class="solr.QuerySenderListener">
- <arr name="queries">
- <lst>
- <str name="q">database:*</str>
- <str name="facet">true</str>
- <str name="facet.mincount">1</str>
- <str name="facet.field">author_exact</str>
- <str name="facet.field">subject_exact</str>
- <str name="facet.field">medium_exact</str>
- <str name="facet.field">date</str>
- <str name="facet.field">database</str>
- </lst>
- </arr>
- </listener>
- <listener event="firstSearcher" class="solr.QuerySenderListener">
- <arr name="queries">
- <lst>
- <str name="q">database:*</str>
- <str name="facet">true</str>
- <str name="facet.mincount">1</str>
- <str name="facet.field">author_exact</str>
- <str name="facet.field">subject_exact</str>
- <str name="facet.field">medium_exact</str>
- <str name="facet.field">date</str>
- <str name="facet.field">database</str>
- </lst>
- </arr>
- </listener>
-
- <!-- Use Cold Searcher
-
- If a search request comes in and there is no current
- registered searcher, then immediately register the still
- warming searcher and use it. If "false" then all requests
- will block until the first searcher is done warming.
- -->
- <useColdSearcher>false</useColdSearcher>
-
- <!-- Max Warming Searchers
-
- Maximum number of searchers that may be warming in the
- background concurrently. An error is returned if this limit
- is exceeded.
-
- Recommend values of 1-2 for read-only slaves, higher for
- masters w/o cache warming.
- -->
- <maxWarmingSearchers>2</maxWarmingSearchers>
-
- </query>
-
-
- <!-- Request Dispatcher
-
- This section contains instructions for how the SolrDispatchFilter
- should behave when processing requests for this SolrCore.
-
- handleSelect is a legacy option that affects the behavior of requests
- such as /select?qt=XXX
-
- handleSelect="true" will cause the SolrDispatchFilter to process
- the request and dispatch the query to a handler specified by the
- "qt" param, assuming "/select" isn't already registered.
-
- handleSelect="false" will cause the SolrDispatchFilter to
- ignore "/select" requests, resulting in a 404 unless a handler
- is explicitly registered with the name "/select"
-
- handleSelect="true" is not recommended for new users, but is the default
- for backwards compatibility
- -->
- <requestDispatcher handleSelect="false" >
- <!-- Request Parsing
-
- These settings indicate how Solr Requests may be parsed, and
- what restrictions may be placed on the ContentStreams from
- those requests
-
- enableRemoteStreaming - enables use of the stream.file
- and stream.url parameters for specifying remote streams.
-
- multipartUploadLimitInKB - specifies the max size (in KiB) of
- Multipart File Uploads that Solr will allow in a Request.
-
- formdataUploadLimitInKB - specifies the max size (in KiB) of
- form data (application/x-www-form-urlencoded) sent via
- POST. You can use POST to pass request parameters not
- fitting into the URL.
-
- addHttpRequestToContext - if set to true, it will instruct
- the requestParsers to include the original HttpServletRequest
- object in the context map of the SolrQueryRequest under the
- key "httpRequest". It will not be used by any of the existing
- Solr components, but may be useful when developing custom
- plugins.
-
- *** WARNING ***
- The settings below authorize Solr to fetch remote files, You
- should make sure your system has some authentication before
- using enableRemoteStreaming="true"
-
- -->
- <requestParsers enableRemoteStreaming="true"
- multipartUploadLimitInKB="2048000"
- formdataUploadLimitInKB="2048"
- addHttpRequestToContext="false"/>
-
- <!-- HTTP Caching
-
- Set HTTP caching related parameters (for proxy caches and clients).
-
- The options below instruct Solr not to output any HTTP Caching
- related headers
- -->
- <httpCaching never304="true" />
- <!-- If you include a <cacheControl> directive, it will be used to
- generate a Cache-Control header (as well as an Expires header
- if the value contains "max-age=")
-
- By default, no Cache-Control header is generated.
-
- You can use the <cacheControl> option even if you have set
- never304="true"
- -->
- <!--
- <httpCaching never304="true" >
- <cacheControl>max-age=30, public</cacheControl>
- </httpCaching>
- -->
- <!-- To enable Solr to respond with automatically generated HTTP
- Caching headers, and to response to Cache Validation requests
- correctly, set the value of never304="false"
-
- This will cause Solr to generate Last-Modified and ETag
- headers based on the properties of the Index.
-
- The following options can also be specified to affect the
- values of these headers...
-
- lastModFrom - the default value is "openTime" which means the
- Last-Modified value (and validation against If-Modified-Since
- requests) will all be relative to when the current Searcher
- was opened. You can change it to lastModFrom="dirLastMod" if
- you want the value to exactly correspond to when the physical
- index was last modified.
-
- etagSeed="..." is an option you can change to force the ETag
- header (and validation against If-None-Match requests) to be
- different even if the index has not changed (ie: when making
- significant changes to your config file)
-
- (lastModifiedFrom and etagSeed are both ignored if you use
- the never304="true" option)
- -->
- <!--
- <httpCaching lastModifiedFrom="openTime"
- etagSeed="Solr">
- <cacheControl>max-age=30, public</cacheControl>
- </httpCaching>
- -->
- </requestDispatcher>
-
- <!-- Request Handlers
-
- http://wiki.apache.org/solr/SolrRequestHandler
-
- Incoming queries will be dispatched to a specific handler by name
- based on the path specified in the request.
-
- Legacy behavior: If the request path uses "/select" but no Request
- Handler has that name, and if handleSelect="true" has been specified in
- the requestDispatcher, then the Request Handler is dispatched based on
- the qt parameter. Handlers without a leading '/' are accessed this way
- like so: http://host/app/[core/]select?qt=name If no qt is
- given, then the requestHandler that declares default="true" will be
- used or the one named "standard".
-
- If a Request Handler is declared with startup="lazy", then it will
- not be initialized until the first request that uses it.
-
- -->
- <!-- SearchHandler
-
- http://wiki.apache.org/solr/SearchHandler
-
- For processing Search Queries, the primary Request Handler
- provided with Solr is "SearchHandler" It delegates to a sequent
- of SearchComponents (see below) and supports distributed
- queries across multiple shards
- -->
- <requestHandler name="/select" class="solr.SearchHandler">
- <!-- default values for query parameters can be specified, these
- will be overridden by parameters in the request
- -->
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <int name="rows">10</int>
- <str name="df">text</str>
- <str name="fl">*,score</str>
- </lst>
- <!-- In addition to defaults, "appends" params can be specified
- to identify values which should be appended to the list of
- multi-val params from the query (or the existing "defaults").
- -->
- <!-- In this example, the param "fq=instock:true" would be appended to
- any query time fq params the user may specify, as a mechanism for
- partitioning the index, independent of any user selected filtering
- that may also be desired (perhaps as a result of faceted searching).
-
- NOTE: there is *absolutely* nothing a client can do to prevent these
- "appends" values from being used, so don't use this mechanism
- unless you are sure you always want it.
- -->
- <!--
- <lst name="appends">
- <str name="fq">inStock:true</str>
- </lst>
- -->
- <!-- "invariants" are a way of letting the Solr maintainer lock down
- the options available to Solr clients. Any params values
- specified here are used regardless of what values may be specified
- in either the query, the "defaults", or the "appends" params.
-
- In this example, the facet.field and facet.query params would
- be fixed, limiting the facets clients can use. Faceting is
- not turned on by default - but if the client does specify
- facet=true in the request, these are the only facets they
- will be able to see counts for; regardless of what other
- facet.field or facet.query params they may specify.
-
- NOTE: there is *absolutely* nothing a client can do to prevent these
- "invariants" values from being used, so don't use this mechanism
- unless you are sure you always want it.
- -->
- <!--
- <lst name="invariants">
- <str name="facet.field">cat</str>
- <str name="facet.field">manu_exact</str>
- <str name="facet.query">price:[* TO 500]</str>
- <str name="facet.query">price:[500 TO *]</str>
- </lst>
- -->
- <!-- If the default list of SearchComponents is not desired, that
- list can either be overridden completely, or components can be
- prepended or appended to the default list. (see below)
- -->
- <!--
- <arr name="components">
- <str>nameOfCustomComponent1</str>
- <str>nameOfCustomComponent2</str>
- </arr>
- -->
- </requestHandler>
-
- <!-- A request handler that returns indented JSON by default -->
- <requestHandler name="/query" class="solr.SearchHandler">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="wt">json</str>
- <str name="indent">true</str>
- <str name="df">text</str>
- </lst>
- </requestHandler>
-
-
- <!-- realtime get handler, guaranteed to return the latest stored fields of
- any document, without the need to commit or open a new searcher. The
- current implementation relies on the updateLog feature being enabled. -->
- <requestHandler name="/get" class="solr.RealTimeGetHandler">
- <lst name="defaults">
- <str name="omitHeader">true</str>
- <str name="wt">json</str>
- <str name="indent">true</str>
- </lst>
- </requestHandler>
-
-
- <!-- A Robust Example
-
- This example SearchHandler declaration shows off usage of the
- SearchHandler with many defaults declared
-
- Note that multiple instances of the same Request Handler
- (SearchHandler) can be registered multiple times with different
- names (and different init parameters)
- -->
- <requestHandler name="/browse" class="solr.SearchHandler">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
-
- <!-- VelocityResponseWriter settings -->
- <str name="wt">velocity</str>
- <str name="v.template">browse</str>
- <str name="v.layout">layout</str>
- <str name="title">Solritas</str>
-
- <!-- Query settings -->
- <str name="defType">edismax</str>
- <str name="qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
- </str>
- <str name="df">text</str>
- <str name="mm">100%</str>
- <str name="q.alt">*:*</str>
- <str name="rows">10</str>
- <str name="fl">*,score</str>
-
- <str name="mlt.qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
- </str>
- <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
- <int name="mlt.count">3</int>
-
- <!-- Faceting defaults -->
- <str name="facet">on</str>
- <str name="facet.field">cat</str>
- <str name="facet.field">manu_exact</str>
- <str name="facet.field">content_type</str>
- <str name="facet.field">author_s</str>
- <str name="facet.query">ipod</str>
- <str name="facet.query">GB</str>
- <str name="facet.mincount">1</str>
- <str name="facet.pivot">cat,inStock</str>
- <str name="facet.range.other">after</str>
- <str name="facet.range">price</str>
- <int name="f.price.facet.range.start">0</int>
- <int name="f.price.facet.range.end">600</int>
- <int name="f.price.facet.range.gap">50</int>
- <str name="facet.range">popularity</str>
- <int name="f.popularity.facet.range.start">0</int>
- <int name="f.popularity.facet.range.end">10</int>
- <int name="f.popularity.facet.range.gap">3</int>
- <str name="facet.range">manufacturedate_dt</str>
- <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
- <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
- <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
- <str name="f.manufacturedate_dt.facet.range.other">before</str>
- <str name="f.manufacturedate_dt.facet.range.other">after</str>
-
- <!-- Highlighting defaults -->
- <str name="hl">on</str>
- <str name="hl.fl">content features title name</str>
- <str name="hl.encoder">html</str>
- <str name="hl.simple.pre"><b></str>
- <str name="hl.simple.post"></b></str>
- <str name="f.title.hl.fragsize">0</str>
- <str name="f.title.hl.alternateField">title</str>
- <str name="f.name.hl.fragsize">0</str>
- <str name="f.name.hl.alternateField">name</str>
- <str name="f.content.hl.snippets">3</str>
- <str name="f.content.hl.fragsize">200</str>
- <str name="f.content.hl.alternateField">content</str>
- <str name="f.content.hl.maxAlternateFieldLength">750</str>
-
- <!-- Spell checking defaults -->
- <str name="spellcheck">on</str>
- <str name="spellcheck.extendedResults">false</str>
- <str name="spellcheck.count">5</str>
- <str name="spellcheck.alternativeTermCount">2</str>
- <str name="spellcheck.maxResultsForSuggest">5</str>
- <str name="spellcheck.collate">true</str>
- <str name="spellcheck.collateExtendedResults">true</str>
- <str name="spellcheck.maxCollationTries">5</str>
- <str name="spellcheck.maxCollations">3</str>
- </lst>
-
- <!-- append spellchecking to our list of components -->
- <arr name="last-components">
- <str>spellcheck</str>
- </arr>
- </requestHandler>
-
-
- <!-- Update Request Handler.
-
- http://wiki.apache.org/solr/UpdateXmlMessages
-
- The canonical Request Handler for Modifying the Index through
- commands specified using XML, JSON, CSV, or JAVABIN
-
- Note: Since solr1.1 requestHandlers requires a valid content
- type header if posted in the body. For example, curl now
- requires: -H 'Content-type:text/xml; charset=utf-8'
-
- To override the request content type and force a specific
- Content-type, use the request parameter:
- ?update.contentType=text/csv
-
- This handler will pick a response format to match the input
- if the 'wt' parameter is not explicit
- -->
- <requestHandler name="/update" class="solr.UpdateRequestHandler">
- <!-- See below for information on defining
- updateRequestProcessorChains that can be used by name
- on each Update Request
- -->
- <!--
- <lst name="defaults">
- <str name="update.chain">dedupe</str>
- </lst>
- -->
- </requestHandler>
-
- <!-- for back compat with clients using /update/json and /update/csv -->
- <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
- <lst name="defaults">
- <str name="stream.contentType">application/json</str>
- </lst>
- </requestHandler>
- <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
- <lst name="defaults">
- <str name="stream.contentType">application/csv</str>
- </lst>
- </requestHandler>
-
- <!-- Solr Cell Update Request Handler
-
- http://wiki.apache.org/solr/ExtractingRequestHandler
-
- -->
- <requestHandler name="/update/extract"
- startup="lazy"
- class="solr.extraction.ExtractingRequestHandler" >
- <lst name="defaults">
- <str name="lowernames">true</str>
- <str name="uprefix">ignored_</str>
-
- <!-- capture link hrefs but ignore div attributes -->
- <str name="captureAttr">true</str>
- <str name="fmap.a">links</str>
- <str name="fmap.div">ignored_</str>
- </lst>
- </requestHandler>
-
-
- <!-- Field Analysis Request Handler
-
- RequestHandler that provides much the same functionality as
- analysis.jsp. Provides the ability to specify multiple field
- types and field names in the same request and outputs
- index-time and query-time analysis for each of them.
-
- Request parameters are:
- analysis.fieldname - field name whose analyzers are to be used
-
- analysis.fieldtype - field type whose analyzers are to be used
- analysis.fieldvalue - text for index-time analysis
- q (or analysis.q) - text for query time analysis
- analysis.showmatch (true|false) - When set to true and when
- query analysis is performed, the produced tokens of the
- field value analysis will be marked as "matched" for every
- token that is produces by the query analysis
- -->
- <requestHandler name="/analysis/field"
- startup="lazy"
- class="solr.FieldAnalysisRequestHandler" />
-
-
- <!-- Document Analysis Handler
-
- http://wiki.apache.org/solr/AnalysisRequestHandler
-
- An analysis handler that provides a breakdown of the analysis
- process of provided documents. This handler expects a (single)
- content stream with the following format:
-
- <docs>
- <doc>
- <field name="id">1</field>
- <field name="name">The Name</field>
- <field name="text">The Text Value</field>
- </doc>
- <doc>...</doc>
- <doc>...</doc>
- ...
- </docs>
-
- Note: Each document must contain a field which serves as the
- unique key. This key is used in the returned response to associate
- an analysis breakdown to the analyzed document.
-
- Like the FieldAnalysisRequestHandler, this handler also supports
- query analysis by sending either an "analysis.query" or "q"
- request parameter that holds the query text to be analyzed. It
- also supports the "analysis.showmatch" parameter which when set to
- true, all field tokens that match the query tokens will be marked
- as a "match".
- -->
- <requestHandler name="/analysis/document"
- class="solr.DocumentAnalysisRequestHandler"
- startup="lazy" />
-
- <!-- Admin Handlers
-
- Admin Handlers - This will register all the standard admin
- RequestHandlers.
- -->
- <requestHandler name="/admin/"
- class="solr.admin.AdminHandlers" />
- <!-- This single handler is equivalent to the following... -->
- <!--
- <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />
- <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />
- <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />
- <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />
- <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
- <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >
- -->
- <!-- If you wish to hide files under ${solr.home}/conf, explicitly
- register the ShowFileRequestHandler using:
- -->
- <!--
- <requestHandler name="/admin/file"
- class="solr.admin.ShowFileRequestHandler" >
- <lst name="invariants">
- <str name="hidden">synonyms.txt</str>
- <str name="hidden">anotherfile.txt</str>
- </lst>
- </requestHandler>
- -->
-
- <!-- ping/healthcheck -->
- <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
- <lst name="invariants">
- <str name="q">solrpingquery</str>
- </lst>
- <lst name="defaults">
- <str name="echoParams">all</str>
- </lst>
- <!-- An optional feature of the PingRequestHandler is to configure the
- handler with a "healthcheckFile" which can be used to enable/disable
- the PingRequestHandler.
- relative paths are resolved against the data dir
- -->
- <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
- </requestHandler>
-
- <!-- Echo the request contents back to the client -->
- <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="echoHandler">true</str>
- </lst>
- </requestHandler>
-
- <!-- Solr Replication
-
- The SolrReplicationHandler supports replicating indexes from a
- "master" used for indexing and "slaves" used for queries.
-
- http://wiki.apache.org/solr/SolrReplication
-
- It is also necessary for SolrCloud to function (in Cloud mode, the
- replication handler is used to bulk transfer segments when nodes
- are added or need to recover).
-
- https://wiki.apache.org/solr/SolrCloud/
- -->
- <requestHandler name="/replication" class="solr.ReplicationHandler" >
- <!--
- To enable simple master/slave replication, uncomment one of the
- sections below, depending on whether this solr instance should be
- the "master" or a "slave". If this instance is a "slave" you will
- also need to fill in the masterUrl to point to a real machine.
- -->
- <!--
- <lst name="master">
- <str name="replicateAfter">commit</str>
- <str name="replicateAfter">startup</str>
- <str name="confFiles">schema.xml,stopwords.txt</str>
- </lst>
- -->
- <!--
- <lst name="slave">
- <str name="masterUrl">http://your-master-hostname:8983/solr</str>
- <str name="pollInterval">00:00:60</str>
- </lst>
- -->
- </requestHandler>
-
- <!-- Search Components
-
- Search components are registered to SolrCore and used by
- instances of SearchHandler (which can access them by name)
-
- By default, the following components are available:
-
- <searchComponent name="query" class="solr.QueryComponent" />
- <searchComponent name="facet" class="solr.FacetComponent" />
- <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
- <searchComponent name="highlight" class="solr.HighlightComponent" />
- <searchComponent name="stats" class="solr.StatsComponent" />
- <searchComponent name="debug" class="solr.DebugComponent" />
-
- Default configuration in a requestHandler would look like:
-
- <arr name="components">
- <str>query</str>
- <str>facet</str>
- <str>mlt</str>
- <str>highlight</str>
- <str>stats</str>
- <str>debug</str>
- </arr>
-
- If you register a searchComponent to one of the standard names,
- that will be used instead of the default.
-
- To insert components before or after the 'standard' components, use:
-
- <arr name="first-components">
- <str>myFirstComponentName</str>
- </arr>
-
- <arr name="last-components">
- <str>myLastComponentName</str>
- </arr>
-
- NOTE: The component registered with the name "debug" will
- always be executed after the "last-components"
-
- -->
-
- <!-- Spell Check
-
- The spell check component can return a list of alternative spelling
- suggestions.
-
- http://wiki.apache.org/solr/SpellCheckComponent
- -->
- <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
-
- <str name="queryAnalyzerFieldType">text_general</str>
-
- <!-- Multiple "Spell Checkers" can be declared and used by this
- component
- -->
-
- <!-- a spellchecker built from a field of the main index -->
- <lst name="spellchecker">
- <str name="name">default</str>
- <str name="field">text</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
- <str name="distanceMeasure">internal</str>
- <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
- <float name="accuracy">0.5</float>
- <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
- <int name="maxEdits">2</int>
- <!-- the minimum shared prefix when enumerating terms -->
- <int name="minPrefix">1</int>
- <!-- maximum number of inspections per result. -->
- <int name="maxInspections">5</int>
- <!-- minimum length of a query term to be considered for correction -->
- <int name="minQueryLength">4</int>
- <!-- maximum threshold of documents a query term can appear to be considered for correction -->
- <float name="maxQueryFrequency">0.01</float>
- <!-- uncomment this to require suggestions to occur in 1% of the documents
- <float name="thresholdTokenFrequency">.01</float>
- -->
- </lst>
-
- <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
- <lst name="spellchecker">
- <str name="name">wordbreak</str>
- <str name="classname">solr.WordBreakSolrSpellChecker</str>
- <str name="field">name</str>
- <str name="combineWords">true</str>
- <str name="breakWords">true</str>
- <int name="maxChanges">10</int>
- </lst>
-
- <!-- a spellchecker that uses a different distance measure -->
- <!--
- <lst name="spellchecker">
- <str name="name">jarowinkler</str>
- <str name="field">spell</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <str name="distanceMeasure">
- org.apache.lucene.search.spell.JaroWinklerDistance
- </str>
- </lst>
- -->
-
- <!-- a spellchecker that use an alternate comparator
-
- comparatorClass be one of:
- 1. score (default)
- 2. freq (Frequency first, then score)
- 3. A fully qualified class name
- -->
- <!--
- <lst name="spellchecker">
- <str name="name">freq</str>
- <str name="field">lowerfilt</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <str name="comparatorClass">freq</str>
- -->
-
- <!-- A spellchecker that reads the list of words from a file -->
- <!--
- <lst name="spellchecker">
- <str name="classname">solr.FileBasedSpellChecker</str>
- <str name="name">file</str>
- <str name="sourceLocation">spellings.txt</str>
- <str name="characterEncoding">UTF-8</str>
- <str name="spellcheckIndexDir">spellcheckerFile</str>
- </lst>
- -->
- </searchComponent>
-
- <!-- A request handler for demonstrating the spellcheck component.
-
- NOTE: This is purely as an example. The whole purpose of the
- SpellCheckComponent is to hook it into the request handler that
- handles your normal user queries so that a separate request is
- not needed to get suggestions.
-
- IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
- NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
-
- See http://wiki.apache.org/solr/SpellCheckComponent for details
- on the request parameters.
- -->
- <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="df">text</str>
- <!-- Solr will use suggestions from both the 'default' spellchecker
- and from the 'wordbreak' spellchecker and combine them.
- collations (re-written queries) can include a combination of
- corrections from both spellcheckers -->
- <str name="spellcheck.dictionary">default</str>
- <str name="spellcheck.dictionary">wordbreak</str>
- <str name="spellcheck">on</str>
- <str name="spellcheck.extendedResults">true</str>
- <str name="spellcheck.count">10</str>
- <str name="spellcheck.alternativeTermCount">5</str>
- <str name="spellcheck.maxResultsForSuggest">5</str>
- <str name="spellcheck.collate">true</str>
- <str name="spellcheck.collateExtendedResults">true</str>
- <str name="spellcheck.maxCollationTries">10</str>
- <str name="spellcheck.maxCollations">5</str>
- </lst>
- <arr name="last-components">
- <str>spellcheck</str>
- </arr>
- </requestHandler>
-
- <!-- Term Vector Component
-
- http://wiki.apache.org/solr/TermVectorComponent
- -->
- <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
-
- <!-- A request handler for demonstrating the term vector component
-
- This is purely as an example.
-
- In reality you will likely want to add the component to your
- already specified request handlers.
- -->
- <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="df">text</str>
- <bool name="tv">true</bool>
- </lst>
- <arr name="last-components">
- <str>tvComponent</str>
- </arr>
- </requestHandler>
-
- <!-- Clustering Component
-
- http://wiki.apache.org/solr/ClusteringComponent
-
- You'll need to set the solr.clustering.enabled system property
- when running solr to run with clustering enabled:
-
- java -Dsolr.clustering.enabled=true -jar start.jar
-
- -->
- <searchComponent name="clustering"
- enable="${solr.clustering.enabled:false}"
- class="solr.clustering.ClusteringComponent" >
- <!-- Declare an engine -->
- <lst name="engine">
- <!-- The name, only one can be named "default" -->
- <str name="name">default</str>
-
- <!-- Class name of Carrot2 clustering algorithm.
-
- Currently available algorithms are:
-
- * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
- * org.carrot2.clustering.stc.STCClusteringAlgorithm
- * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-
- See http://project.carrot2.org/algorithms.html for the
- algorithm's characteristics.
- -->
- <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-
- <!-- Overriding values for Carrot2 default algorithm attributes.
-
- For a description of all available attributes, see:
- http://download.carrot2.org/stable/manual/#chapter.components.
- Use attribute key as name attribute of str elements
- below. These can be further overridden for individual
- requests by specifying attribute key as request parameter
- name and attribute value as parameter value.
- -->
- <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
-
- <!-- Location of Carrot2 lexical resources.
-
- A directory from which to load Carrot2-specific stop words
- and stop labels. Absolute or relative to Solr config directory.
- If a specific resource (e.g. stopwords.en) is present in the
- specified dir, it will completely override the corresponding
- default one that ships with Carrot2.
-
- For an overview of Carrot2 lexical resources, see:
- http://download.carrot2.org/head/manual/#chapter.lexical-resources
- -->
- <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
-
- <!-- The language to assume for the documents.
-
- For a list of allowed values, see:
- http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
- -->
- <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
- </lst>
- <lst name="engine">
- <str name="name">stc</str>
- <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
- </lst>
- </searchComponent>
-
- <!-- A request handler for demonstrating the clustering component
-
- This is purely as an example.
-
- In reality you will likely want to add the component to your
- already specified request handlers.
- -->
- <requestHandler name="/clustering"
- startup="lazy"
- enable="${solr.clustering.enabled:false}"
- class="solr.SearchHandler">
- <lst name="defaults">
- <bool name="clustering">true</bool>
- <str name="clustering.engine">default</str>
- <bool name="clustering.results">true</bool>
- <!-- The title field -->
- <str name="carrot.title">name</str>
- <str name="carrot.url">id</str>
- <!-- The field to cluster on -->
- <str name="carrot.snippet">features</str>
- <!-- produce summaries -->
- <bool name="carrot.produceSummary">true</bool>
- <!-- the maximum number of labels per cluster -->
- <!--<int name="carrot.numDescriptions">5</int>-->
- <!-- produce sub clusters -->
- <bool name="carrot.outputSubClusters">false</bool>
-
- <str name="defType">edismax</str>
- <str name="qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- </str>
- <str name="q.alt">*:*</str>
- <str name="rows">10</str>
- <str name="fl">*,score</str>
- </lst>
- <arr name="last-components">
- <str>clustering</str>
- </arr>
- </requestHandler>
-
- <!-- Terms Component
-
- http://wiki.apache.org/solr/TermsComponent
-
- A component to return terms and document frequency of those
- terms
- -->
- <searchComponent name="terms" class="solr.TermsComponent"/>
-
- <!-- A request handler for demonstrating the terms component -->
- <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <bool name="terms">true</bool>
- <bool name="distrib">false</bool>
- </lst>
- <arr name="components">
- <str>terms</str>
- </arr>
- </requestHandler>
-
-
- <!-- Query Elevation Component
-
- http://wiki.apache.org/solr/QueryElevationComponent
-
- a search component that enables you to configure the top
- results for a given query regardless of the normal lucene
- scoring.
- -->
- <searchComponent name="elevator" class="solr.QueryElevationComponent" >
- <!-- pick a fieldType to analyze queries -->
- <str name="queryFieldType">string</str>
- <str name="config-file">elevate.xml</str>
- </searchComponent>
-
- <!-- A request handler for demonstrating the elevator component -->
- <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="df">text</str>
- </lst>
- <arr name="last-components">
- <str>elevator</str>
- </arr>
- </requestHandler>
-
- <!-- Highlighting Component
-
- http://wiki.apache.org/solr/HighlightingParameters
- -->
- <searchComponent class="solr.HighlightComponent" name="highlight">
- <highlighting>
- <!-- Configure the standard fragmenter -->
- <!-- This could most likely be commented out in the "default" case -->
- <fragmenter name="gap"
- default="true"
- class="solr.highlight.GapFragmenter">
- <lst name="defaults">
- <int name="hl.fragsize">100</int>
- </lst>
- </fragmenter>
-
- <!-- A regular-expression-based fragmenter
- (for sentence extraction)
- -->
- <fragmenter name="regex"
- class="solr.highlight.RegexFragmenter">
- <lst name="defaults">
- <!-- slightly smaller fragsizes work better because of slop -->
- <int name="hl.fragsize">70</int>
- <!-- allow 50% slop on fragment sizes -->
- <float name="hl.regex.slop">0.5</float>
- <!-- a basic sentence pattern -->
- <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
- </lst>
- </fragmenter>
-
- <!-- Configure the standard formatter -->
- <formatter name="html"
- default="true"
- class="solr.highlight.HtmlFormatter">
- <lst name="defaults">
- <str name="hl.simple.pre"><![CDATA[<em>]]></str>
- <str name="hl.simple.post"><![CDATA[</em>]]></str>
- </lst>
- </formatter>
-
- <!-- Configure the standard encoder -->
- <encoder name="html"
- class="solr.highlight.HtmlEncoder" />
-
- <!-- Configure the standard fragListBuilder -->
- <fragListBuilder name="simple"
- class="solr.highlight.SimpleFragListBuilder"/>
-
- <!-- Configure the single fragListBuilder -->
- <fragListBuilder name="single"
- class="solr.highlight.SingleFragListBuilder"/>
-
- <!-- Configure the weighted fragListBuilder -->
- <fragListBuilder name="weighted"
- default="true"
- class="solr.highlight.WeightedFragListBuilder"/>
-
- <!-- default tag FragmentsBuilder -->
- <fragmentsBuilder name="default"
- default="true"
- class="solr.highlight.ScoreOrderFragmentsBuilder">
- <!--
- <lst name="defaults">
- <str name="hl.multiValuedSeparatorChar">/</str>
- </lst>
- -->
- </fragmentsBuilder>
-
- <!-- multi-colored tag FragmentsBuilder -->
- <fragmentsBuilder name="colored"
- class="solr.highlight.ScoreOrderFragmentsBuilder">
- <lst name="defaults">
- <str name="hl.tag.pre"><![CDATA[
- <b style="background:yellow">,<b style="background:lawgreen">,
- <b style="background:aquamarine">,<b style="background:magenta">,
- <b style="background:palegreen">,<b style="background:coral">,
- <b style="background:wheat">,<b style="background:khaki">,
- <b style="background:lime">,<b style="background:deepskyblue">]]></str>
- <str name="hl.tag.post"><![CDATA[</b>]]></str>
- </lst>
- </fragmentsBuilder>
-
- <boundaryScanner name="default"
- default="true"
- class="solr.highlight.SimpleBoundaryScanner">
- <lst name="defaults">
- <str name="hl.bs.maxScan">10</str>
- <str name="hl.bs.chars">.,!? 	 </str>
- </lst>
- </boundaryScanner>
-
- <boundaryScanner name="breakIterator"
- class="solr.highlight.BreakIteratorBoundaryScanner">
- <lst name="defaults">
- <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
- <str name="hl.bs.type">WORD</str>
- <!-- language and country are used when constructing Locale object. -->
- <!-- And the Locale object will be used when getting instance of BreakIterator -->
- <str name="hl.bs.language">en</str>
- <str name="hl.bs.country">US</str>
- </lst>
- </boundaryScanner>
- </highlighting>
- </searchComponent>
-
- <!-- Update Processors
-
- Chains of Update Processor Factories for dealing with Update
- Requests can be declared, and then used by name in Update
- Request Processors
-
- http://wiki.apache.org/solr/UpdateRequestProcessor
-
- -->
- <!-- Deduplication
-
- An example dedup update processor that creates the "id" field
- on the fly based on the hash code of some other fields. This
- example has overwriteDupes set to false since we are using the
- id field as the signatureField and Solr will maintain
- uniqueness based on that anyway.
-
- -->
- <!--
- <updateRequestProcessorChain name="dedupe">
- <processor class="solr.processor.SignatureUpdateProcessorFactory">
- <bool name="enabled">true</bool>
- <str name="signatureField">id</str>
- <bool name="overwriteDupes">false</bool>
- <str name="fields">name,features,cat</str>
- <str name="signatureClass">solr.processor.Lookup3Signature</str>
- </processor>
- <processor class="solr.LogUpdateProcessorFactory" />
- <processor class="solr.RunUpdateProcessorFactory" />
- </updateRequestProcessorChain>
- -->
-
- <!-- Language identification
-
- This example update chain identifies the language of the incoming
- documents using the langid contrib. The detected language is
- written to field language_s. No field name mapping is done.
- The fields used for detection are text, title, subject and description,
- making this example suitable for detecting languages form full-text
- rich documents injected via ExtractingRequestHandler.
- See more about langId at http://wiki.apache.org/solr/LanguageDetection
- -->
- <!--
- <updateRequestProcessorChain name="langid">
- <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
- <str name="langid.fl">text,title,subject,description</str>
- <str name="langid.langField">language_s</str>
- <str name="langid.fallback">en</str>
- </processor>
- <processor class="solr.LogUpdateProcessorFactory" />
- <processor class="solr.RunUpdateProcessorFactory" />
- </updateRequestProcessorChain>
- -->
-
- <!-- Script update processor
-
- This example hooks in an update processor implemented using JavaScript.
-
- See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
- -->
- <!--
- <updateRequestProcessorChain name="script">
- <processor class="solr.StatelessScriptUpdateProcessorFactory">
- <str name="script">update-script.js</str>
- <lst name="params">
- <str name="config_param">example config parameter</str>
- </lst>
- </processor>
- <processor class="solr.RunUpdateProcessorFactory" />
- </updateRequestProcessorChain>
- -->
-
- <!-- Response Writers
-
- http://wiki.apache.org/solr/QueryResponseWriter
-
- Request responses will be written using the writer specified by
- the 'wt' request parameter matching the name of a registered
- writer.
-
- The "default" writer is the default and will be used if 'wt' is
- not specified in the request.
- -->
- <!-- The following response writers are implicitly configured unless
- overridden...
- -->
- <!--
- <queryResponseWriter name="xml"
- default="true"
- class="solr.XMLResponseWriter" />
- <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
- <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
- <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
- <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
- <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
- <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
- <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
- -->
-
- <queryResponseWriter name="json" class="solr.JSONResponseWriter">
- <!-- For the purposes of the tutorial, JSON responses are written as
- plain text so that they are easy to read in *any* browser.
- If you expect a MIME type of "application/json" just remove this override.
- -->
- <str name="content-type">text/plain; charset=UTF-8</str>
- </queryResponseWriter>
-
- <!--
- Custom response writers can be declared as needed...
- -->
- <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
-
-
- <!-- XSLT response writer transforms the XML output by any xslt file found
- in Solr's conf/xslt directory. Changes to xslt files are checked for
- every xsltCacheLifetimeSeconds.
- -->
- <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
- <int name="xsltCacheLifetimeSeconds">5</int>
- </queryResponseWriter>
-
- <!-- Query Parsers
-
- http://wiki.apache.org/solr/SolrQuerySyntax
-
- Multiple QParserPlugins can be registered by name, and then
- used in either the "defType" param for the QueryComponent (used
- by SearchHandler) or in LocalParams
- -->
- <!-- example of registering a query parser -->
- <!--
- <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
- -->
-
- <!-- Function Parsers
-
- http://wiki.apache.org/solr/FunctionQuery
-
- Multiple ValueSourceParsers can be registered by name, and then
- used as function names when using the "func" QParser.
- -->
- <!-- example of registering a custom function parser -->
- <!--
- <valueSourceParser name="myfunc"
- class="com.mycompany.MyValueSourceParser" />
- -->
-
-
- <!-- Document Transformers
- http://wiki.apache.org/solr/DocTransformers
- -->
- <!--
- Could be something like:
- <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
- <int name="connection">jdbc://....</int>
- </transformer>
-
- To add a constant value to all docs, use:
- <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
- <int name="value">5</int>
- </transformer>
-
- If you want the user to still be able to change it with _value:something_ use this:
- <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
- <double name="defaultValue">5</double>
- </transformer>
-
- If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
- EditorialMarkerFactory will do exactly that:
- <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
- -->
-
-
- <!-- Legacy config for the admin interface -->
- <admin>
- <defaultQuery>*:*</defaultQuery>
- </admin>
-
-</config>
+++ /dev/null
-pizza
-history
\ No newline at end of file
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+++ /dev/null
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-#some test synonym mappings unlikely to appear in real input text
-aaafoo => aaabar
-bbbfoo => bbbfoo bbbbar
-cccfoo => cccbar cccbaz
-fooaaa,baraaa,bazaaa
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
+++ /dev/null
-/*
- This is a basic skeleton JavaScript update processor.
-
- In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in
- the example solrconfig.xml and must be uncommented to be enabled.
-
- See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details.
-*/
-
-function processAdd(cmd) {
-
- doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument
- id = doc.getFieldValue("id");
- logger.info("update-script#processAdd: id=" + id);
-
-// Set a field value:
-// doc.setField("foo_s", "whatever");
-
-// Get a configuration parameter:
-// config_param = params.get('config_param'); // "params" only exists if processor configured with <lst name="params">
-
-// Get a request parameter:
-// some_param = req.getParams().get("some_param")
-
-// Add a field of field names that match a pattern:
-// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss
-// field_names = doc.getFieldNames().toArray();
-// for(i=0; i < field_names.length; i++) {
-// field_name = field_names[i];
-// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); }
-// }
-
-}
-
-function processDelete(cmd) {
- // no-op
-}
-
-function processMergeIndexes(cmd) {
- // no-op
-}
-
-function processCommit(cmd) {
- // no-op
-}
-
-function processRollback(cmd) {
- // no-op
-}
-
-function finish() {
- // no-op
-}
+++ /dev/null
-Introduction
-------------
-Solr Search Velocity Templates
-
-A quick demo of using Solr using http://wiki.apache.org/solr/VelocityResponseWriter
-
-You typically access these templates via:
- http://localhost:8983/solr/collection1/browse
-
-It's called "browse" because you can click around with your mouse
-without needing to type any search terms. And of course it
-also works as a standard search app as well.
-
-Known Limitations
------------------
-* The /browse and the VelocityResponseWriter component
- serve content directly from Solr, which usually requires
- Solr's HTTP API to be exposed. Advanced users could
- potentially access other parts of Solr directly.
-* There are some hard coded fields in these templates.
- Since these templates live under conf, they should be
- considered part of the overall configuration, and
- must be coordinated with schema.xml and solrconfig.xml
-
-Velocity Info
--------------
-Java-based template language.
-
-It's nice in this context because change to the templates
-are immediately visible in browser on the next visit.
-
-Links:
- http://velocity.apache.org
- http://wiki.apache.org/velocity/
- http://velocity.apache.org/engine/releases/velocity-1.7/user-guide.html
-
-
-File List
----------
-
-System and Misc:
- VM_global_library.vm - Macros used other templates,
- exact filename is important for Velocity to see it
- error.vm - shows errors, if any
- debug.vm - includes toggle links for "explain" and "all fields"
- activated by debug link in footer.vm
- README.txt - this file
-
-Overall Page Composition:
- browse.vm - Main entry point into templates
- layout.vm - overall HTML page layout
- head.vm - elements in the <head> section of the HTML document
- header.vm - top section of page visible to users
- footer.vm - bottom section of page visible to users,
- includes debug and help links
- main.css - CSS style for overall pages
- see also jquery.autocomplete.css
-
-Query Form and Options:
- query_form.vm - renders query form
- query_group.vm - group by fields
- e.g.: Manufacturer or Poplularity
- query_spatial.vm - select box for location based Geospacial search
-
-Spelling Suggestions:
- did_you_mean.vm - hyperlinked spelling suggestions in results
- suggest.vm - dynamic spelling suggestions
- as you type in the search form
- jquery.autocomplete.js - supporting files for dynamic suggestions
- jquery.autocomplete.css - Most CSS is defined in main.css
-
-
-Search Results, General:
- (see also browse.vm)
- tabs.vm - provides navigation to advanced search options
- pagination_top.vm - paging and staticis at top of results
- pagination_bottom.vm - paging and staticis at bottom of results
- results_list.vm
- hit.vm - called for each matching doc,
- decides which template to use
- hit_grouped.vm - display results grouped by field values
- product_doc.vm - display a Product
- join_doc.vm - display a joined document
- richtext_doc.vm - display a complex/misc. document
- hit_plain.vm - basic display of all fields,
- edit results_list.vm to enable this
-
-
-Search Results, Facets & Clusters:
- facets.vm - calls the 4 facet and 1 cluster template
- facet_fields.vm - display facets based on field values
- e.g.: fields specified by &facet.field=
- facet_queries.vm - display facets based on specific facet queries
- e.g.: facets specified by &facet.query=
- facet_ranges.vm - display facets based on ranges
- e.g.: ranges specified by &facet.range=
- facet_pivot.vm - display pivot based facets
- e.g.: facets specified by &facet.pivot=
- cluster.vm - if clustering is available
- then call cluster_results.vm
- cluster_results.vm - actual rendering of clusters
+++ /dev/null
-#**
- * Global macros used by other templates.
- * This file must be named VM_global_library.vm
- * in order for Velocity to find it.
- *#
-
-#macro(param $key)$request.params.get($key)#end
-
-#macro(url_root)/solr#end
-
-## TODO: s/url_for_solr/url_for_core/ and s/url_root/url_for_solr/
-#macro(core_name)$request.core.name#end
-#macro(url_for_solr)#{url_root}#if($request.core.name != "")/$request.core.name#end#end
-#macro(url_for_home)#url_for_solr/browse#end
-
-#macro(q)&q=$!{esc.url($params.get('q'))}#end
-
-#macro(fqs $p)#foreach($fq in $p)#if($velocityCount>1)&#{end}fq=$esc.url($fq)#end#end
-
-#macro(debug)#if($request.params.get('debugQuery'))&debugQuery=true#end#end
-
-#macro(boostPrice)#if($request.params.get('bf') == 'price')&bf=price#end#end
-
-#macro(annotate)#if($request.params.get('annotateBrowse'))&annotateBrowse=true#end#end
-
-#macro(annTitle $msg)#if($annotate == true)title="$msg"#end#end
-
-#macro(spatial)#if($request.params.get('sfield'))&sfield=store#end#if($request.params.get('pt'))&pt=$request.params.get('pt')#end#if($request.params.get('d'))&d=$request.params.get('d')#end#end
-
-#macro(qOpts)#set($queryOpts = $request.params.get("queryOpts"))#if($queryOpts && $queryOpts != "")&queryOpts=$queryOpts#end#end
-
-#macro(group)#if($request.params.getBool("group") == true)&group=true#end#if($request.params.get("group.field"))#foreach($grp in $request.params.getParams('group.field'))&group.field=$grp#end#end#end
-
-#macro(sort $p)#if($p)#foreach($s in $p)&sort=$esc.url($s)#end#end#end
-
-#macro(lensNoQ)?#if($request.params.getParams('fq') and $list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end#sort($request.params.getParams('sort'))#debug#boostPrice#annotate#spatial#qOpts#group#end
-#macro(lens)#lensNoQ#q#end
-
-
-#macro(url_for_lens)#{url_for_home}#lens#end
-
-#macro(url_for_start $start)#url_for_home#lens&start=$start#end
-
-#macro(url_for_filters $p)#url_for_home?#q#boostPrice#spatial#qOpts#if($list.size($p) > 0)&#fqs($p)#end#debug#end
-
-#macro(url_for_nested_facet_query $field)#url_for_home#lens&fq=$esc.url($field)#end
-
-## TODO: convert to use {!raw f=$field}$value (with escaping of course)
-#macro(url_for_facet_filter $field $value)#url_for_home#lens&fq=$esc.url($field):%22$esc.url($value)%22#end
-
-#macro(url_for_facet_date_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
-
-#macro(url_for_facet_range_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
-
-
-#macro(link_to_previous_page $text)
- #if($page.current_page_number > 1)
- #set($prev_start = $page.start - $page.results_per_page)
- <a class="prev-page" href="#url_for_start($prev_start)">$text</a>
- #end
-#end
-
-#macro(link_to_next_page $text)
- #if($page.current_page_number < $page.page_count)
- #set($next_start = $page.start + $page.results_per_page)
- <a class="next-page" href="#url_for_start($next_start)">$text</a>
- #end
-#end
-
-#macro(link_to_page $page_number $text)
- #if($page_number == $page.current_page_number)
- $text
- #else
- #if($page_number <= $page.page_count)
- #set($page_start = $page_number * $page.results_per_page - $page.results_per_page)
- <a class="page" href="#url_for_start($page_start)">$text</a>
- #end
- #end
-#end
-
-#macro(display_facet_query $field, $display, $fieldName)
- #if($field.size() > 0)
- <span class="facet-field">$display</span>
- <ul>
- #foreach ($facet in $field)
- #if ($facet.value > 0)
- #set($facetURL = "#url_for_nested_facet_query($facet.key)")
- #if ($facetURL != '')
- <li><a href="$facetURL">$facet.key</a> ($facet.value)</li>
- #end
- #end
- #end
- </ul>
- #end
-#end
-
-
-#macro(display_facet_range $field, $display, $fieldName, $start, $end, $gap, $before, $after)
- <span class="facet-field">$display</span>
- <ul>
- #if($before && $before != "")
- #set($value = "[* TO " + "#format_value($start)" + "}")
- #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
- <li><a href="$facetURL">Less than #format_value($start)</a> ($before)</li>
- #end
- #foreach ($facet in $field)
- #set($rangeEnd = "#range_get_to_value($facet.key, $gap)")
- #set($value = "[" + $facet.key + " TO " + $rangeEnd + "}")
- #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
- #if ($facetURL != '')
- <li><a href="$facetURL">$facet.key - #format_value($rangeEnd)</a> ($facet.value)</li>
- #end
- #end
- #if($end && $end != "" && $after > 0)
- #set($value = "[" + "#format_value($end)" + " TO *}")
- #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
- <li><a href="$facetURL">More than #format_value($end)</a> ($after)</li>
- #end
- </ul>
-#end
-
-## $pivots is a list of facet_pivot
-#macro(display_facet_pivot $pivots, $display)
- #if($pivots.size() > 0)
- <span class="facet-field">$display</span>
- <ul>
- #foreach ($pivot in $pivots)
- #foreach ($entry in $pivot.value)
- <a href="#url_for_facet_filter($entry.field, $entry.value)">$entry.field::$entry.value</a> ($entry.count)
- <ul>
- #foreach($nest in $entry.pivot)
- <a href="#url_for_facet_filter($entry.field, $entry.value)&fq=$esc.url($nest.field):%22$esc.url($nest.value)%22">$nest.field::$nest.value</a> ($nest.count)
- #end
- </ul>
- #end
- #end
- </ul>
- #end
-#end
-
-#macro(field $f)
- #if($response.response.highlighting.get($docId).get($f).get(0))
- #set($pad = "")
- #foreach($v in $response.response.highlighting.get($docId).get($f))
-$pad$v##
- #set($pad = " ... ")
- #end
- #else
- #foreach($v in $doc.getFieldValues($f))
-$v##
- #end
- #end
-#end
-
-#macro(utc_date $theDate)
-$date.format("yyyy-MM-dd'T'HH:mm:ss'Z'",$theDate,$date.getLocale(),$date.getTimeZone().getTimeZone("UTC"))##
-#end
-
-#macro(format_value $val)
-#if(${val.class.name} == "java.util.Date")
-#utc_date($val)##
-#else
-$val##
-#end
-#end
-
-#macro(range_get_to_value $inval, $gapval)
-#if(${gapval.class.name} == "java.lang.String")
-$inval$gapval##
-#elseif(${gapval.class.name} == "java.lang.Float" || ${inval.class.name} == "java.lang.Float")
-$math.toDouble($math.add($inval,$gapval))##
-#else
-$math.add($inval,$gapval)##
-#end
-#end
+++ /dev/null
-#**
- * Main entry point into the /browse templates
- *#
-
-#set($searcher = $request.searcher)
-#set($params = $request.params)
-#set($clusters = $response.response.clusters)
-#set($mltResults = $response.response.get("moreLikeThis"))
-#set($annotate = $params.get("annotateBrowse"))
-#parse('query_form.vm')
-#parse('did_you_mean.vm')
-
-<div class="navigators">
- #parse("facets.vm")
-</div>
-
-<div class="pagination">
- #parse("pagination_top.vm")
-</div>
-
-## Show Error Message, if any
-<div class="error">
- #parse("error.vm")
-</div>
-
-## Render Results, actual matching docs
-<div class="results">
- #parse("results_list.vm")
-</div>
-
-<div class="pagination">
- #parse("pagination_bottom.vm")
-</div>
+++ /dev/null
-#**
- * Check if Clustering is Enabled and then
- * call cluster_results.vm
- *#
-
-<h2 #annTitle("Clusters generated by Carrot2 using the /clustering RequestHandler")>
- Clusters
-</h2>
-
-## Div tag has placeholder text by default
-<div id="clusters">
- Run Solr with java -Dsolr.clustering.enabled=true -jar start.jar to see results
-</div>
-
-## Replace the div content *if* Carrot^2 is available
-<script type="text/javascript">
- $('#clusters').load("#url_for_solr/clustering#lens",
- {'wt':'velocity', 'v.template':"cluster_results"});
-</script>
+++ /dev/null
-#**
- * Actual rendering of Clusters
- *#
-
-## For each cluster
-#foreach ($clusters in $response.response.clusters)
-
- #set($labels = $clusters.get('labels'))
- #set($docs = $clusters.get('docs'))
-
- ## This Cluster's Heading
- <h3>
- #foreach ($label in $labels)
- ## Keep the following line together to prevent
- ## a space appearing before each comma
- $label#if( $foreach.hasNext ),#end
- #end
- </h3>
-
- ## This Cluster's Documents
- <ol>
- ## For each doc in this cluster
- #foreach ($cluDoc in $docs)
- <li>
- <a href="#url_for_home?q=id:$cluDoc">
- $cluDoc</a>
- </li>
- #end
- </ol>
-
-#end ## end for each Cluster
+++ /dev/null
-#**
- * Show Debugging Information, if enabled
- *#
-
-#if( $params.getBool("debugQuery",false) )
- <a href="#" onclick='jQuery(this).siblings("pre").toggle(); return false;'>
- toggle explain</a>
-
- <pre style="display:none">
- $response.getExplainMap().get($doc.getFirstValue('id'))
- </pre>
-
- <a href="#" onclick='jQuery(this).siblings("pre2").toggle(); return false;'>
- toggle all fields</a>
-
- <pre2 style="display:none">
- #foreach($fieldname in $doc.fieldNames)
- <br>
- <span class="field-name">$fieldname :</span>
- <span>
- #foreach($value in $doc.getFieldValues($fieldname))
- $esc.html($value)
- #end
- </span>
- </br>
- #end
- </pre2>
-#end
+++ /dev/null
-#**
- * Hyperlinked spelling suggestions in results list
- *#
-
-#set($dym = $response.response.spellcheck.suggestions.collation.collationQuery)
-#if($dym)
- Did you mean
- <a href="#{url_for_home}#{lensNoQ}&q=$esc.url($dym)">$esc.html($dym)</a>?
-#end
+++ /dev/null
-#**
- * Show Error Message, if any
- *#
-
-## Show Error Message, if any
-## Usually rendered inside div class=error
-
-#if( $response.response.error.code )
- <h1>ERROR $response.response.error.code</h1>
- $response.response.error.msg
-#end
+++ /dev/null
-#**
- * Display facets based on field values
- * e.g.: fields specified by &facet.field=
- *#
-
-#if($response.facetFields)
- <h2 #annTitle("Facets generated by adding &facet.field= to the request")>
- Field Facets
- </h2>
- #foreach($field in $response.facetFields)
- ## Hide facets without value
- #if($field.values.size() > 0)
- <span class="facet-field">$field.name</span>
- <ul>
- #foreach($facet in $field.values)
- <li>
- <a href="#url_for_facet_filter($field.name, $facet.name)">$facet.name</a> ($facet.count)
- </li>
- #end
- </ul>
- #end ## end if > 0
- #end ## end for each facet field
-#end ## end if response has facet fields
+++ /dev/null
-#**
- * Display Pivot-Based Facets
- * e.g.: facets specified by &facet.pivot=
- *#
-
-<h2 #annTitle("Facets generated by adding &facet.pivot= to the request")>
- Pivot Facets
-</h2>
-
-#set($pivot = $response.response.facet_counts.facet_pivot)
-
-#display_facet_pivot($pivot, "")
+++ /dev/null
-#**
- * Display facets based on specific facet queries
- * e.g.: facets specified by &facet.query=
- *#
-
-#set($field = $response.response.facet_counts.facet_queries)
-
-<h2 #annTitle("Facets generated by adding &facet.query= to the request")>
- Query Facets
-</h2>
-
-#display_facet_query($field, "", "")
+++ /dev/null
-#**
- * Display facets based on ranges of values, AKA "Bukets"
- * e.g.: ranges specified by &facet.range=
- *#
-
-<h2 #annTitle("Facets generated by adding &facet.range= to the request")>
- Range Facets
-</h2>
-
-#foreach ($field in $response.response.facet_counts.facet_ranges)
- ## Hide facets without value
- #if($field.value.counts.size() > 0)
- #set($name = $field.key)
- #set($display = $name)
- #set($f = $field.value.counts)
- #set($start = $field.value.start)
- #set($end = $field.value.end)
- #set($gap = $field.value.gap)
- #set($before = $field.value.before)
- #set($after = $field.value.after)
- #display_facet_range($f, $display, $name, $start, $end, $gap, $before, $after)
- #end ## end if has any values
-#end ## end for each facet range
+++ /dev/null
-#**
- * Overall Facet display block
- * Invokes the 4 facet and 1 cluster template
- *#
-
-#parse('facet_fields.vm')
-#parse('facet_queries.vm')
-#parse('facet_ranges.vm')
-#parse('facet_pivot.vm')
-#parse('cluster.vm')
+++ /dev/null
-#**
- * Render the bottom section of the page visible to users
- *#
-
-<hr/>
-<div>
- <span>Options:</span>
-
- #if($request.params.get('debugQuery'))
- <a href="#url_for_home?#q#if($list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end">
- disable debug</a>
- #else
- <a href="#url_for_lens&debugQuery=true&fl=*,score">
- enable debug</a>
- #end
- -
- #if($annotate)
- <a href="#url_for_home?#q#if($list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end#boostPrice">
- disable annotation</a>
- #else
- <a href="#url_for_lens&annotateBrowse=true">
- enable annotation</a>
- #end
- -
- <a #annTitle("Click to switch to an XML response: &wt=xml") href="#url_for_lens&wt=xml#if($request.params.get('debugQuery'))&debugQuery=true#end">
- XML results</a>
-
-</div>
-
-<div>
- Generated by <a href="http://wiki.apache.org/solr/VelocityResponseWriter">VelocityResponseWriter</a>
-</div>
-<div>
- <span>Documentation: </span>
- <a href="http://lucene.apache.org/solr">Solr Home Page</a>, <a href="http://wiki.apache.org/solr">
- Solr Wiki</a>
- </div>
-<div>
- Disclaimer:
- The locations displayed in this demonstration are purely fictional.
- It is more than likely that no store with the items listed actually
- exists at that location!
-</div>
+++ /dev/null
-#**
- * Provide elements for the <head> section of the HTML document
- *#
-
- ## An example of using an arbitrary request parameter
- <title>#param('title')</title>
- <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
-
- <script type="text/javascript" src="#{url_root}/js/lib/jquery-1.7.2.min.js"></script>
- <link rel="stylesheet" type="text/css" href="#{url_for_solr}/admin/file?file=/velocity/main.css&contentType=text/css"/>
- <link rel="stylesheet" href="#{url_for_solr}/admin/file?file=/velocity/jquery.autocomplete.css&contentType=text/css" type="text/css" />
- <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/jquery.autocomplete.js&contentType=text/javascript"></script>
-
-
- <script>
- $(document).ready(function(){
- $("\#q").autocomplete('#{url_for_solr}/terms', { ## backslash escaped #q as that is a macro defined in VM_global_library.vm
- extraParams:{
- 'terms.prefix': function() { return $("\#q").val();},
- 'terms.sort': 'count',
- 'terms.fl': 'name',
- 'wt': 'velocity',
- 'v.template': 'suggest'
- }
- }
- ).keydown(function(e){
- if (e.keyCode === 13){
- $("#query-form").trigger('submit');
- }
- });
-
- // http://localhost:8983/solr/collection1/terms?terms.fl=name&terms.prefix=i&terms.sort=count&wt=velocity&v.template=suggest
- });
-
- </script>
+++ /dev/null
-#**
- * Render the top section of the page visible to users
- *#
-
-<div id="head">
- <span ><a href="#url_for_home#if($request.params.get('debugQuery'))?debugQuery=true#end"><img src="#{url_root}/img/solr.png" id="logo"/></a></span>
-</div>
+++ /dev/null
-#**
- * Called for each matching document but then
- * calls one of product_doc, join_doc or richtext_doc
- * depending on which fields the doc has
- *#
-
-#set($docId = $doc.getFieldValue('id'))
-
-<div class="result-document">
-
- ## Has a "name" field ?
- #if($doc.getFieldValue('name'))
- #parse("product_doc.vm")
-
- ## Has a "compName_s" field ?
- #elseif($doc.getFieldValue('compName_s'))
- #parse("join_doc.vm")
-
- ## Fallback to richtext_doc
- #else
- #parse("richtext_doc.vm")
-
- #end
-
-</div>
+++ /dev/null
-#**
- * Display grouped results
- *#
-
-<div class="result-document">
-
- <div class="result-title">
- <b>$grouping.key</b>
- </div>
-
- <div>
- Total Matches in Group: $grouping.value.matches
- </div>
-
- <div> ## list of groups
-
- #foreach ($group in $grouping.value.groups)
- <div class="group-value">
- #if($group.groupValue)$group.groupValue#{else}<i>No group</i>#end
- <span #annTitle("The count of the number of documents in this group")>
- ($group.doclist.numFound)
- </span>
- </div>
-
- <div class="group-doclist"
- #annTitle("Contains the top scoring documents in the group")
- >
- #foreach ($doc in $group.doclist)
- #set($docId = $doc.getFieldValue('id'))
- #if($doc.getFieldValue('name'))
- #parse("product_doc.vm")
- #elseif($doc.getFieldValue('compName_s'))
- #parse("join_doc.vm")
- #else
- #parse("richtext_doc.vm")
- #end
- #end
- </div>
-
- #end ## end of foreach group in grouping.value.groups
- </div> ## div tag for entire list of groups
-
-</div> ## end of div class=result-document
+++ /dev/null
-#**
- * An extremely plain / debug version of hit.vm
- *#
-
-<table>
- ## For each field
- #foreach( $fieldName in $doc.fieldNames )
- ## For each value
- #foreach( $value in $doc.getFieldValues($fieldName) )
- <tr>
- ## Field Name
- <th align="right" valign="top">
- #if( $foreach.count == 1 )
- $fieldName:
- #end
- </th>
- ## Field Value(s)
- <td align="left" valign="top">
- $esc.html($value) <br/>
- </td>
- </tr>
- #end ## end for each value
- #end ## end for each field
-</table>
-<hr/>
+++ /dev/null
-#**
- * Display documents that are joined to other documents
- *#
-
-<div class="result-title">
- <b>#field('compName_s')</b>
-</div>
-
-<div>
- Id: #field('id')
- (company-details document for
- <a href="http://wiki.apache.org/solr/Join" target="_new">join</a>
- )
-</div>
-
-<div>
- Address: #field('address_s')
-</div>
-
-#parse('debug.vm')
+++ /dev/null
-.ac_results {
- padding: 0px;
- border: 1px solid black;
- background-color: white;
- overflow: hidden;
- z-index: 99999;
-}
-
-.ac_results ul {
- width: 100%;
- list-style-position: outside;
- list-style: none;
- padding: 0;
- margin: 0;
-}
-
-.ac_results li {
- margin: 0px;
- padding: 2px 5px;
- cursor: default;
- display: block;
- /*
- if width will be 100% horizontal scrollbar will apear
- when scroll mode will be used
- */
- /*width: 100%;*/
- font: menu;
- font-size: 12px;
- /*
- it is very important, if line-height not setted or setted
- in relative units scroll will be broken in firefox
- */
- line-height: 16px;
- overflow: hidden;
-}
-
-.ac_loading {
- background: white url('indicator.gif') right center no-repeat;
-}
-
-.ac_odd {
- background-color: #eee;
-}
-
-.ac_over {
- background-color: #0A246A;
- color: white;
-}
+++ /dev/null
-/*
- * Autocomplete - jQuery plugin 1.1pre
- *
- * Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer
- *
- * Dual licensed under the MIT and GPL licenses:
- * http://www.opensource.org/licenses/mit-license.php
- * http://www.gnu.org/licenses/gpl.html
- *
- * Revision: $Id: jquery.autocomplete.js 5785 2008-07-12 10:37:33Z joern.zaefferer $
- *
- */
-
-;(function($) {
-
-$.fn.extend({
- autocomplete: function(urlOrData, options) {
- var isUrl = typeof urlOrData == "string";
- options = $.extend({}, $.Autocompleter.defaults, {
- url: isUrl ? urlOrData : null,
- data: isUrl ? null : urlOrData,
- delay: isUrl ? $.Autocompleter.defaults.delay : 10,
- max: options && !options.scroll ? 10 : 150
- }, options);
-
- // if highlight is set to false, replace it with a do-nothing function
- options.highlight = options.highlight || function(value) { return value; };
-
- // if the formatMatch option is not specified, then use formatItem for backwards compatibility
- options.formatMatch = options.formatMatch || options.formatItem;
-
- return this.each(function() {
- new $.Autocompleter(this, options);
- });
- },
- result: function(handler) {
- return this.bind("result", handler);
- },
- search: function(handler) {
- return this.trigger("search", [handler]);
- },
- flushCache: function() {
- return this.trigger("flushCache");
- },
- setOptions: function(options){
- return this.trigger("setOptions", [options]);
- },
- unautocomplete: function() {
- return this.trigger("unautocomplete");
- }
-});
-
-$.Autocompleter = function(input, options) {
-
- var KEY = {
- UP: 38,
- DOWN: 40,
- DEL: 46,
- TAB: 9,
- RETURN: 13,
- ESC: 27,
- COMMA: 188,
- PAGEUP: 33,
- PAGEDOWN: 34,
- BACKSPACE: 8
- };
-
- // Create $ object for input element
- var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass);
-
- var timeout;
- var previousValue = "";
- var cache = $.Autocompleter.Cache(options);
- var hasFocus = 0;
- var lastKeyPressCode;
- var config = {
- mouseDownOnSelect: false
- };
- var select = $.Autocompleter.Select(options, input, selectCurrent, config);
-
- var blockSubmit;
-
- // prevent form submit in opera when selecting with return key
- $.browser.opera && $(input.form).bind("submit.autocomplete", function() {
- if (blockSubmit) {
- blockSubmit = false;
- return false;
- }
- });
-
- // only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all
- $input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) {
- // track last key pressed
- lastKeyPressCode = event.keyCode;
- switch(event.keyCode) {
-
- case KEY.UP:
- event.preventDefault();
- if ( select.visible() ) {
- select.prev();
- } else {
- onChange(0, true);
- }
- break;
-
- case KEY.DOWN:
- event.preventDefault();
- if ( select.visible() ) {
- select.next();
- } else {
- onChange(0, true);
- }
- break;
-
- case KEY.PAGEUP:
- event.preventDefault();
- if ( select.visible() ) {
- select.pageUp();
- } else {
- onChange(0, true);
- }
- break;
-
- case KEY.PAGEDOWN:
- event.preventDefault();
- if ( select.visible() ) {
- select.pageDown();
- } else {
- onChange(0, true);
- }
- break;
-
- // matches also semicolon
- case options.multiple && $.trim(options.multipleSeparator) == "," && KEY.COMMA:
- case KEY.TAB:
- case KEY.RETURN:
- if( selectCurrent() ) {
- // stop default to prevent a form submit, Opera needs special handling
- event.preventDefault();
- blockSubmit = true;
- return false;
- }
- break;
-
- case KEY.ESC:
- select.hide();
- break;
-
- default:
- clearTimeout(timeout);
- timeout = setTimeout(onChange, options.delay);
- break;
- }
- }).focus(function(){
- // track whether the field has focus, we shouldn't process any
- // results if the field no longer has focus
- hasFocus++;
- }).blur(function() {
- hasFocus = 0;
- if (!config.mouseDownOnSelect) {
- hideResults();
- }
- }).click(function() {
- // show select when clicking in a focused field
- if ( hasFocus++ > 1 && !select.visible() ) {
- onChange(0, true);
- }
- }).bind("search", function() {
- // TODO why not just specifying both arguments?
- var fn = (arguments.length > 1) ? arguments[1] : null;
- function findValueCallback(q, data) {
- var result;
- if( data && data.length ) {
- for (var i=0; i < data.length; i++) {
- if( data[i].result.toLowerCase() == q.toLowerCase() ) {
- result = data[i];
- break;
- }
- }
- }
- if( typeof fn == "function" ) fn(result);
- else $input.trigger("result", result && [result.data, result.value]);
- }
- $.each(trimWords($input.val()), function(i, value) {
- request(value, findValueCallback, findValueCallback);
- });
- }).bind("flushCache", function() {
- cache.flush();
- }).bind("setOptions", function() {
- $.extend(options, arguments[1]);
- // if we've updated the data, repopulate
- if ( "data" in arguments[1] )
- cache.populate();
- }).bind("unautocomplete", function() {
- select.unbind();
- $input.unbind();
- $(input.form).unbind(".autocomplete");
- });
-
-
- function selectCurrent() {
- var selected = select.selected();
- if( !selected )
- return false;
-
- var v = selected.result;
- previousValue = v;
-
- if ( options.multiple ) {
- var words = trimWords($input.val());
- if ( words.length > 1 ) {
- v = words.slice(0, words.length - 1).join( options.multipleSeparator ) + options.multipleSeparator + v;
- }
- v += options.multipleSeparator;
- }
-
- $input.val(v);
- hideResultsNow();
- $input.trigger("result", [selected.data, selected.value]);
- return true;
- }
-
- function onChange(crap, skipPrevCheck) {
- if( lastKeyPressCode == KEY.DEL ) {
- select.hide();
- return;
- }
-
- var currentValue = $input.val();
-
- if ( !skipPrevCheck && currentValue == previousValue )
- return;
-
- previousValue = currentValue;
-
- currentValue = lastWord(currentValue);
- if ( currentValue.length >= options.minChars) {
- $input.addClass(options.loadingClass);
- if (!options.matchCase)
- currentValue = currentValue.toLowerCase();
- request(currentValue, receiveData, hideResultsNow);
- } else {
- stopLoading();
- select.hide();
- }
- };
-
- function trimWords(value) {
- if ( !value ) {
- return [""];
- }
- var words = value.split( options.multipleSeparator );
- var result = [];
- $.each(words, function(i, value) {
- if ( $.trim(value) )
- result[i] = $.trim(value);
- });
- return result;
- }
-
- function lastWord(value) {
- if ( !options.multiple )
- return value;
- var words = trimWords(value);
- return words[words.length - 1];
- }
-
- // fills in the input box w/the first match (assumed to be the best match)
- // q: the term entered
- // sValue: the first matching result
- function autoFill(q, sValue){
- // autofill in the complete box w/the first match as long as the user hasn't entered in more data
- // if the last user key pressed was backspace, don't autofill
- if( options.autoFill && (lastWord($input.val()).toLowerCase() == q.toLowerCase()) && lastKeyPressCode != KEY.BACKSPACE ) {
- // fill in the value (keep the case the user has typed)
- $input.val($input.val() + sValue.substring(lastWord(previousValue).length));
- // select the portion of the value not typed by the user (so the next character will erase)
- $.Autocompleter.Selection(input, previousValue.length, previousValue.length + sValue.length);
- }
- };
-
- function hideResults() {
- clearTimeout(timeout);
- timeout = setTimeout(hideResultsNow, 200);
- };
-
- function hideResultsNow() {
- var wasVisible = select.visible();
- select.hide();
- clearTimeout(timeout);
- stopLoading();
- if (options.mustMatch) {
- // call search and run callback
- $input.search(
- function (result){
- // if no value found, clear the input box
- if( !result ) {
- if (options.multiple) {
- var words = trimWords($input.val()).slice(0, -1);
- $input.val( words.join(options.multipleSeparator) + (words.length ? options.multipleSeparator : "") );
- }
- else
- $input.val( "" );
- }
- }
- );
- }
- if (wasVisible)
- // position cursor at end of input field
- $.Autocompleter.Selection(input, input.value.length, input.value.length);
- };
-
- function receiveData(q, data) {
- if ( data && data.length && hasFocus ) {
- stopLoading();
- select.display(data, q);
- autoFill(q, data[0].value);
- select.show();
- } else {
- hideResultsNow();
- }
- };
-
- function request(term, success, failure) {
- if (!options.matchCase)
- term = term.toLowerCase();
- var data = cache.load(term);
- data = null; // Avoid buggy cache and go to Solr every time
- // recieve the cached data
- if (data && data.length) {
- success(term, data);
- // if an AJAX url has been supplied, try loading the data now
- } else if( (typeof options.url == "string") && (options.url.length > 0) ){
-
- var extraParams = {
- timestamp: +new Date()
- };
- $.each(options.extraParams, function(key, param) {
- extraParams[key] = typeof param == "function" ? param() : param;
- });
-
- $.ajax({
- // try to leverage ajaxQueue plugin to abort previous requests
- mode: "abort",
- // limit abortion to this input
- port: "autocomplete" + input.name,
- dataType: options.dataType,
- url: options.url,
- data: $.extend({
- q: lastWord(term),
- limit: options.max
- }, extraParams),
- success: function(data) {
- var parsed = options.parse && options.parse(data) || parse(data);
- cache.add(term, parsed);
- success(term, parsed);
- }
- });
- } else {
- // if we have a failure, we need to empty the list -- this prevents the the [TAB] key from selecting the last successful match
- select.emptyList();
- failure(term);
- }
- };
-
- function parse(data) {
- var parsed = [];
- var rows = data.split("\n");
- for (var i=0; i < rows.length; i++) {
- var row = $.trim(rows[i]);
- if (row) {
- row = row.split("|");
- parsed[parsed.length] = {
- data: row,
- value: row[0],
- result: options.formatResult && options.formatResult(row, row[0]) || row[0]
- };
- }
- }
- return parsed;
- };
-
- function stopLoading() {
- $input.removeClass(options.loadingClass);
- };
-
-};
-
-$.Autocompleter.defaults = {
- inputClass: "ac_input",
- resultsClass: "ac_results",
- loadingClass: "ac_loading",
- minChars: 1,
- delay: 400,
- matchCase: false,
- matchSubset: true,
- matchContains: false,
- cacheLength: 10,
- max: 100,
- mustMatch: false,
- extraParams: {},
- selectFirst: false,
- formatItem: function(row) { return row[0]; },
- formatMatch: null,
- autoFill: false,
- width: 0,
- multiple: false,
- multipleSeparator: ", ",
- highlight: function(value, term) {
- return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "<strong>$1</strong>");
- },
- scroll: true,
- scrollHeight: 180
-};
-
-$.Autocompleter.Cache = function(options) {
-
- var data = {};
- var length = 0;
-
- function matchSubset(s, sub) {
- if (!options.matchCase)
- s = s.toLowerCase();
- var i = s.indexOf(sub);
- if (options.matchContains == "word"){
- i = s.toLowerCase().search("\\b" + sub.toLowerCase());
- }
- if (i == -1) return false;
- return i == 0 || options.matchContains;
- };
-
- function add(q, value) {
- if (length > options.cacheLength){
- flush();
- }
- if (!data[q]){
- length++;
- }
- data[q] = value;
- }
-
- function populate(){
- if( !options.data ) return false;
- // track the matches
- var stMatchSets = {},
- nullData = 0;
-
- // no url was specified, we need to adjust the cache length to make sure it fits the local data store
- if( !options.url ) options.cacheLength = 1;
-
- // track all options for minChars = 0
- stMatchSets[""] = [];
-
- // loop through the array and create a lookup structure
- for ( var i = 0, ol = options.data.length; i < ol; i++ ) {
- var rawValue = options.data[i];
- // if rawValue is a string, make an array otherwise just reference the array
- rawValue = (typeof rawValue == "string") ? [rawValue] : rawValue;
-
- var value = options.formatMatch(rawValue, i+1, options.data.length);
- if ( value === false )
- continue;
-
- var firstChar = value.charAt(0).toLowerCase();
- // if no lookup array for this character exists, look it up now
- if( !stMatchSets[firstChar] )
- stMatchSets[firstChar] = [];
-
- // if the match is a string
- var row = {
- value: value,
- data: rawValue,
- result: options.formatResult && options.formatResult(rawValue) || value
- };
-
- // push the current match into the set list
- stMatchSets[firstChar].push(row);
-
- // keep track of minChars zero items
- if ( nullData++ < options.max ) {
- stMatchSets[""].push(row);
- }
- };
-
- // add the data items to the cache
- $.each(stMatchSets, function(i, value) {
- // increase the cache size
- options.cacheLength++;
- // add to the cache
- add(i, value);
- });
- }
-
- // populate any existing data
- setTimeout(populate, 25);
-
- function flush(){
- data = {};
- length = 0;
- }
-
- return {
- flush: flush,
- add: add,
- populate: populate,
- load: function(q) {
- if (!options.cacheLength || !length)
- return null;
- /*
- * if dealing w/local data and matchContains than we must make sure
- * to loop through all the data collections looking for matches
- */
- if( !options.url && options.matchContains ){
- // track all matches
- var csub = [];
- // loop through all the data grids for matches
- for( var k in data ){
- // don't search through the stMatchSets[""] (minChars: 0) cache
- // this prevents duplicates
- if( k.length > 0 ){
- var c = data[k];
- $.each(c, function(i, x) {
- // if we've got a match, add it to the array
- if (matchSubset(x.value, q)) {
- csub.push(x);
- }
- });
- }
- }
- return csub;
- } else
- // if the exact item exists, use it
- if (data[q]){
- return data[q];
- } else
- if (options.matchSubset) {
- for (var i = q.length - 1; i >= options.minChars; i--) {
- var c = data[q.substr(0, i)];
- if (c) {
- var csub = [];
- $.each(c, function(i, x) {
- if (matchSubset(x.value, q)) {
- csub[csub.length] = x;
- }
- });
- return csub;
- }
- }
- }
- return null;
- }
- };
-};
-
-$.Autocompleter.Select = function (options, input, select, config) {
- var CLASSES = {
- ACTIVE: "ac_over"
- };
-
- var listItems,
- active = -1,
- data,
- term = "",
- needsInit = true,
- element,
- list;
-
- // Create results
- function init() {
- if (!needsInit)
- return;
- element = $("<div/>")
- .hide()
- .addClass(options.resultsClass)
- .css("position", "absolute")
- .appendTo(document.body);
-
- list = $("<ul/>").appendTo(element).mouseover( function(event) {
- if(target(event).nodeName && target(event).nodeName.toUpperCase() == 'LI') {
- active = $("li", list).removeClass(CLASSES.ACTIVE).index(target(event));
- $(target(event)).addClass(CLASSES.ACTIVE);
- }
- }).click(function(event) {
- $(target(event)).addClass(CLASSES.ACTIVE);
- select();
- // TODO provide option to avoid setting focus again after selection? useful for cleanup-on-focus
- input.focus();
- return false;
- }).mousedown(function() {
- config.mouseDownOnSelect = true;
- }).mouseup(function() {
- config.mouseDownOnSelect = false;
- });
-
- if( options.width > 0 )
- element.css("width", options.width);
-
- needsInit = false;
- }
-
- function target(event) {
- var element = event.target;
- while(element && element.tagName != "LI")
- element = element.parentNode;
- // more fun with IE, sometimes event.target is empty, just ignore it then
- if(!element)
- return [];
- return element;
- }
-
- function moveSelect(step) {
- listItems.slice(active, active + 1).removeClass(CLASSES.ACTIVE);
- movePosition(step);
- var activeItem = listItems.slice(active, active + 1).addClass(CLASSES.ACTIVE);
- if(options.scroll) {
- var offset = 0;
- listItems.slice(0, active).each(function() {
- offset += this.offsetHeight;
- });
- if((offset + activeItem[0].offsetHeight - list.scrollTop()) > list[0].clientHeight) {
- list.scrollTop(offset + activeItem[0].offsetHeight - list.innerHeight());
- } else if(offset < list.scrollTop()) {
- list.scrollTop(offset);
- }
- }
- };
-
- function movePosition(step) {
- active += step;
- if (active < 0) {
- active = listItems.size() - 1;
- } else if (active >= listItems.size()) {
- active = 0;
- }
- }
-
- function limitNumberOfItems(available) {
- return options.max && options.max < available
- ? options.max
- : available;
- }
-
- function fillList() {
- list.empty();
- var max = limitNumberOfItems(data.length);
- for (var i=0; i < max; i++) {
- if (!data[i])
- continue;
- var formatted = options.formatItem(data[i].data, i+1, max, data[i].value, term);
- if ( formatted === false )
- continue;
- var li = $("<li/>").html( options.highlight(formatted, term) ).addClass(i%2 == 0 ? "ac_even" : "ac_odd").appendTo(list)[0];
- $.data(li, "ac_data", data[i]);
- }
- listItems = list.find("li");
- if ( options.selectFirst ) {
- listItems.slice(0, 1).addClass(CLASSES.ACTIVE);
- active = 0;
- }
- // apply bgiframe if available
- if ( $.fn.bgiframe )
- list.bgiframe();
- }
-
- return {
- display: function(d, q) {
- init();
- data = d;
- term = q;
- fillList();
- },
- next: function() {
- moveSelect(1);
- },
- prev: function() {
- moveSelect(-1);
- },
- pageUp: function() {
- if (active != 0 && active - 8 < 0) {
- moveSelect( -active );
- } else {
- moveSelect(-8);
- }
- },
- pageDown: function() {
- if (active != listItems.size() - 1 && active + 8 > listItems.size()) {
- moveSelect( listItems.size() - 1 - active );
- } else {
- moveSelect(8);
- }
- },
- hide: function() {
- element && element.hide();
- listItems && listItems.removeClass(CLASSES.ACTIVE);
- active = -1;
- },
- visible : function() {
- return element && element.is(":visible");
- },
- current: function() {
- return this.visible() && (listItems.filter("." + CLASSES.ACTIVE)[0] || options.selectFirst && listItems[0]);
- },
- show: function() {
- var offset = $(input).offset();
- element.css({
- width: typeof options.width == "string" || options.width > 0 ? options.width : $(input).width(),
- top: offset.top + input.offsetHeight,
- left: offset.left
- }).show();
- if(options.scroll) {
- list.scrollTop(0);
- list.css({
- maxHeight: options.scrollHeight,
- overflow: 'auto'
- });
-
- if($.browser.msie && typeof document.body.style.maxHeight === "undefined") {
- var listHeight = 0;
- listItems.each(function() {
- listHeight += this.offsetHeight;
- });
- var scrollbarsVisible = listHeight > options.scrollHeight;
- list.css('height', scrollbarsVisible ? options.scrollHeight : listHeight );
- if (!scrollbarsVisible) {
- // IE doesn't recalculate width when scrollbar disappears
- listItems.width( list.width() - parseInt(listItems.css("padding-left")) - parseInt(listItems.css("padding-right")) );
- }
- }
-
- }
- },
- selected: function() {
- var selected = listItems && listItems.filter("." + CLASSES.ACTIVE).removeClass(CLASSES.ACTIVE);
- return selected && selected.length && $.data(selected[0], "ac_data");
- },
- emptyList: function (){
- list && list.empty();
- },
- unbind: function() {
- element && element.remove();
- }
- };
-};
-
-$.Autocompleter.Selection = function(field, start, end) {
- if( field.createTextRange ){
- var selRange = field.createTextRange();
- selRange.collapse(true);
- selRange.moveStart("character", start);
- selRange.moveEnd("character", end);
- selRange.select();
- } else if( field.setSelectionRange ){
- field.setSelectionRange(start, end);
- } else {
- if( field.selectionStart ){
- field.selectionStart = start;
- field.selectionEnd = end;
- }
- }
- field.focus();
-};
-
-})(jQuery);
\ No newline at end of file
+++ /dev/null
-#**
- * Overall HTML page layout
- *#
-
-<html>
-<head>
- #parse("head.vm")
-</head>
- <body>
- <div id="admin"><a href="#url_root/#/#core_name">Solr Admin</a></div>
- <div id="header">
- #parse("header.vm")
- </div>
- <div id="tabs">
- #parse("tabs.vm")
- </div>
- <div id="content">
- $content
- </div>
- <div id="footer">
- #parse("footer.vm")
- </div>
- </body>
-</html>
+++ /dev/null
-#admin{
- text-align: right;
- vertical-align: top;
-}
-
-#head{
- width: 100%;
-}
-.array-field {
- border: 2px solid #474747;
- background: #FFE9D8;
- padding: 5px;
- margin: 5px;
-}
-
-.array-field-list li {
- list-style: circle;
- margin-left: 20px;
-}
-
-.parsed_query_header {
- font-family: Helvetica, Arial, sans-serif;
- font-size: 10pt;
- font-weight: bold;
-}
-
-.parsed_query {
- font-family: Courier, Courier New, monospaced;
- font-size: 10pt;
- font-weight: normal;
-}
-
-body {
- font-family: Helvetica, Arial, sans-serif;
- font-size: 10pt;
-}
-
-a {
- color: #43a4b1;
-}
-
-.navigators {
- float: left;
- margin: 5px;
- margin-top: 0px;
- width: 185px;
- padding: 5px;
- top: -20px;
- position: relative;
-}
-
-.tabs-bar {
- padding: 5px;
- width: 100%;
- border: 1px solid;
- border-width: 0px 0px 1px 0px;
-}
-.tab {
- font-weight: bold;
- padding: 5px;
- margin: 0px 5px;
- border: 1px solid;
- background-color: #dddddd;
- border-top-left-radius: 4px;
- border-top-right-radius: 4px;
-}
-.tab:hover {
- background: #FEC293;
-}
-.tab.selected {
- background-color: #ffffff;
- border-bottom: 1px solid #ffffff;
-}
-
-.navigators h2 {
- background: #FEC293;
- padding: 2px 5px;
-}
-
-.navigators ul {
- list-style: none;
- margin: 0;
- margin-bottom: 5px;
- margin-top: 5px;
- padding-left: 10px;
-}
-
-.navigators ul li {
- color: #999;
- padding: 2px;
-}
-
-
-
-.facet-field {
- font-weight: bold;
-}
-
-.highlight {
- color: white;
- background-color: gray;
- border: 1px black solid;
-}
-
-.highlight-box {
- margin-left: 15px;
-}
-
-.field-name {
- font-weight: bold;
-}
-
-.highlighted-facet-field {
- background: white;
-}
-
-.constraints {
- margin-top: 10px;
-}
-
-#query-form{
- width: 80%;
-}
-
-
-
-.query-box, .constraints {
- padding: 5px;
- margin: 5px;
- font-weight: normal;
- font-size: 24px;
- letter-spacing: 0.08em;
-}
-
-.query-box #q {
- margin-left: 8px;
- width: 60%;
- height: 50px;
- border: 1px solid #999;
- font-size: 1em;
- padding: 0.4em;
-}
-
-.query-box {
-
-}
-
-.query-boost {
-
- top: 10px;
- left: 50px;
- position: relative;
- font-size: 0.8em;
-}
-
-.query-box .inputs{
- left: 180px;
- position: relative;
-
-}
-
-#logo {
- margin: 10px;
- border-style: none;
-}
-
-.pagination {
- padding-left: 33%;
- background: #eee;
- margin: 5px;
- margin-left: 210px;
- padding-top: 5px;
- padding-bottom: 5px;
-}
-
-.result-document {
- border: 1px solid #999;
- padding: 5px;
- margin: 5px;
- margin-left: 210px;
- margin-bottom: 15px;
-}
-
-.result-document div{
- padding: 5px;
-}
-
-.result-title{
- width:60%;
-}
-
-.result-body{
- background: #ddd;
-}
-
-.mlt{
-
-}
-
-.map{
- float: right;
- position: relative;
- top: -25px;
-}
-
-.result-document:nth-child(2n+1) {
- background-color: #eee;
-}
-
-
-.selected-facet-field {
- font-weight: bold;
-}
-
-li.show {
- list-style: disc;
-}
-
-.group-value{
- font-weight: bold;
-}
-
-.error {
- color: white;
- background-color: red;
- left: 210px;
- width:80%;
- position: relative;
-
-}
+++ /dev/null
-#**
- * Define some Mime-Types, short and long form
- *#
-
-## MimeType to extension map for detecting file type
-## and showing proper icon
-## List of types match the icons in /solr/img/filetypes
-
-## Short MimeType Names
-## Was called $supportedtypes
-#set($supportedMimeTypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip")
-
-## Long Form: map MimeType headers to our Short names
-## Was called $extMap
-#set( $mimeExtensionsMap = {
- "application/x-7z-compressed": "7z",
- "application/postscript": "ai",
- "application/pgp-signature": "asc",
- "application/octet-stream": "bin",
- "application/x-bzip2": "bz2",
- "text/x-c": "c",
- "application/vnd.ms-htmlhelp": "chm",
- "application/java-vm": "class",
- "text/css": "css",
- "text/csv": "csv",
- "application/x-debian-package": "deb",
- "application/msword": "doc",
- "message/rfc822": "eml",
- "image/gif": "gif",
- "application/winhlp": "hlp",
- "text/html": "html",
- "application/java-archive": "jar",
- "text/x-java-source": "java",
- "image/jpeg": "jpeg",
- "application/javascript": "js",
- "application/vnd.oasis.opendocument.chart": "odc",
- "application/vnd.oasis.opendocument.formula": "odf",
- "application/vnd.oasis.opendocument.graphics": "odg",
- "application/vnd.oasis.opendocument.image": "odi",
- "application/vnd.oasis.opendocument.presentation": "odp",
- "application/vnd.oasis.opendocument.spreadsheet": "ods",
- "application/vnd.oasis.opendocument.text": "odt",
- "application/pdf": "pdf",
- "application/pgp-encrypted": "pgp",
- "image/png": "png",
- "application/vnd.ms-powerpoint": "ppt",
- "audio/x-pn-realaudio": "ram",
- "application/x-rar-compressed": "rar",
- "application/vnd.rn-realmedia": "rm",
- "application/rtf": "rtf",
- "application/x-shockwave-flash": "swf",
- "application/vnd.sun.xml.calc": "sxc",
- "application/vnd.sun.xml.draw": "sxd",
- "application/vnd.sun.xml.impress": "sxi",
- "application/vnd.sun.xml.writer": "sxw",
- "application/x-tar": "tar",
- "application/x-tex": "tex",
- "text/plain": "txt",
- "text/x-vcard": "vcf",
- "application/vnd.visio": "vsd",
- "audio/x-wav": "wav",
- "audio/x-ms-wma": "wma",
- "video/x-ms-wmv": "wmv",
- "application/vnd.ms-excel": "xls",
- "application/xml": "xml",
- "application/x-xpinstall": "xpi",
- "application/zip": "zip"
-})
+++ /dev/null
-#**
- * Paging and Statistics at bottom of results
- *#
-
-## Usually rendered in pagination div tag
-
-#if($response.response.get('grouped'))
- ## pass
-#else
-
- #link_to_previous_page("previous")
-
- <span class="results-found">$page.results_found</span>
- results found.
-
- Page <span class="page-num">$page.current_page_number</span>
- of <span class="page-count">$page.page_count</span>
-
- #link_to_next_page("next")
-
-#end
-<br/>
+++ /dev/null
-#**
- * Paging and Statistics at top of results
- *#
-
-## Usually rendered in pagination div tag
-
-## Grouped Results / Not Paginated
-#if($response.response.get('grouped'))
-
- <span>
- <span class="results-found">
- $response.response.get('grouped').size() group(s)
- </span>
- found in ${response.responseHeader.QTime} ms
- </span>
-
-## Regular Results / Use Paging Links if needed
-#else
-
- <span>
- <span class="results-found">$page.results_found</span>
- results found in
- ${response.responseHeader.QTime} ms
- </span>
-
- Page <span class="page-num">$page.current_page_number</span>
- of <span class="page-count">$page.page_count</span>
-
-#end ## end else non-grouped results, normal pagination
+++ /dev/null
-#**
- * Render a hit representing a Product
- * assumed to have a field called "name"
- *#
-
-<div class="result-title"><b>#field('name')</b><span class="mlt"> #if($params.getBool('mlt', false) == false)<a href="#lensNoQ&q=id:$docId&mlt=true">More Like This</a>#end</span></div>
-##do we have a physical store for this product
-#set($store = $doc.getFieldValue('store'))
-#if($store)<div class="map"><img src="http://maps.google.com/maps/api/staticmap?&zoom=12&size=150x80&maptype=roadmap&markers=$doc.getFieldValue('store')&sensor=false" /><div><small><a target="_map" href="http://maps.google.com/?q=$store&source=embed">Larger Map</a></small></div></div>#end
-<div>Id: #field('id')</div>
-<div>Price: #field('price_c')</div>
-<div>Features: #field('features')</div>
-<div>In Stock: #field('inStock')</div>
-<div class="mlt">
- #set($mlt = $mltResults.get($docId))
- #set($mltOn = $params.getBool('mlt'))
- #if($mltOn == true)<div class="field-name">Similar Items</div>#end
- #if ($mltOn && $mlt && $mlt.size() > 0)
- <ul>
- #foreach($mltHit in $mlt)
- #set($mltId = $mltHit.getFieldValue('id'))
- <li><div><a href="#url_for_home?q=id:$mltId">$mltId</a></div><div><span class="field-name">Name:</span> $mltHit.getFieldValue('name')</div>
- <div><span class="field-name">Price:</span> $!number.currency($mltHit.getFieldValue('price')) <span class="field-name">In Stock:</span> $mltHit.getFieldValue('inStock')</div>
-
- </li>
- #end
- </ul>
- #elseif($mltOn && $mlt.size() == 0)
- <div>No Similar Items Found</div>
- #end
-</div>
-#parse('debug.vm')
+++ /dev/null
-<div class="query-box">
- <form id="query-form" action="#{url_for_home}" method="GET">
- <div class="inputs">
- <span #annTitle("Add the query using the &q= parameter")>Find: <input type="text" id="q" name="q" value="$!esc.html($params.get('q'))"/> <input type="submit" id="querySubmit"/> <input type="reset"/></span>
- <div class="query-boost"><span #annTitle("Add the boost function &bf=price to the query")><input type="checkbox" name="bf" value="price" #if($request.params.get('bf') == 'price')checked="true"#end>Boost by Price</input></span>
- #parse("querySpatial.vm")
- #parse("queryGroup.vm")
- </div>
- </div>
-
- #if($request.params.get('debugQuery'))
- <input type="hidden" name="debugQuery" value="true"/>
- #end
- #if($annotate == true)
- <input type="hidden" name="annotateBrowse" value="true"/>
- #end
- #foreach($fq in $request.params.getParams('fq'))
- #if ($fq != "{!bbox}")
- <input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/>
- #end
- #end
- <div class="constraints" #annTitle("Lists out the &fq filters. Click to remove.")>
- #foreach($fq in $params.getParams('fq'))
- #set($previous_fq_count=$velocityCount - 1)
- #if($fq != '')
- > <a style="{text-decoration: line-through;}" href="#url_for_filters($request.params.getParams('fq').subList(0,$previous_fq_count))">$fq</a>
- #end
- #end
- </div>
- <div class="parsed_query_header">
- #if($request.params.get('debugQuery'))
- <a href="#" onclick='jQuery(this).siblings("div").toggle(); return false;'>toggle parsed query</a>
- <div class="parsed_query" style="display:none">$response.response.debug.parsedquery</div>
- #end
- #set($queryOpts = $request.params.get("queryOpts"))
- #if($queryOpts && $queryOpts != "")
- <input type="hidden" name="queryOpts" value="$queryOpts"/>
- #end
- </div>
- </form>
-
-</div>
+++ /dev/null
-#**
- * Renders the main query form
- *#
-
-<div class="query-box">
- <form id="query-form" action="#{url_for_home}" method="GET">
-
- <div class="inputs">
- <span #annTitle("Add the query using the &q= parameter")>
- Find:
- <input type="text" id="q" name="q" value="$!esc.html($params.get('q'))"/>
- <input type="submit" id="querySubmit"/>
- <input type="reset"/>
- </span>
- <div class="query-boost">
- <span #annTitle("Add the boost function &bf=price to the query")>
- <input type="checkbox" name="bf" value="price"
- #if($request.params.get('bf') == 'price')checked="true"#end
- >
- Boost by Price
- </input>
- </span>
- #parse("query_spatial.vm")
- #parse("query_group.vm")
- </div>
- </div>
-
- #if($request.params.get('debugQuery'))
- <input type="hidden" name="debugQuery" value="true"/>
- #end
- #if($annotate == true)
- <input type="hidden" name="annotateBrowse" value="true"/>
- #end
- #foreach($fq in $request.params.getParams('fq'))
- #if ($fq != "{!bbox}")
- <input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/>
- #end
- #end
-
- <div class="constraints" #annTitle("Lists out the &fq filters. Click to remove.")>
- #foreach($fq in $params.getParams('fq'))
- #set($previous_fq_count=$velocityCount - 1)
- #if($fq != '')
- >
- <a style="{text-decoration: line-through;}"
- href="#url_for_filters($request.params.getParams('fq').subList(0,$previous_fq_count))"
- >$fq</a>
- #end
- #end
- </div>
-
- <div class="parsed_query_header">
- #if($request.params.get('debugQuery'))
- <a href="#" onclick='jQuery(this).siblings("div").toggle(); return false;'>toggle parsed query</a>
- <div class="parsed_query" style="display:none">$response.response.debug.parsedquery</div>
- #end
- #set($queryOpts = $request.params.get("queryOpts"))
- #if($queryOpts && $queryOpts != "")
- <input type="hidden" name="queryOpts" value="$queryOpts"/>
- #end
- </div>
-
- </form>
-</div>
+++ /dev/null
-#**
- * Query settings for grouping by fields,
- * e.g.: Manufacturer or Popularity
- *#
-
-#set($queryOpts = $params.get("queryOpts"))
-
-#if($queryOpts == "group")
- <div>
- #set($groupF = $request.params.get('group.field'))
-
- <label #annTitle("Add the &group.field parameter. Multiselect is supported")>
- Group By:
- <select id="group" name="group.field" multiple="true">
- ## TODO: Handle multiple selects correctly
- ## TODO: fix empty / "No Group" selection
-
- <option value=""
- #if($groupF == '')selected="true"#end
- >
- No Group
- </option>
-
- <option value="manu_exact"
- #if($groupF == 'manu_exact')selected="true"#end
- >
- Manufacturer
- </option>
-
- <option value="popularity"
- #if($groupF == 'popularity')selected="true"#end
- >
- Popularity
- </option>
-
- </select>
- </label>
-
- <input type="hidden" name="group" value="true"/>
-
- </div>
-
-#end
+++ /dev/null
-#**
- * Query logic for selecting location / Geospatial search
- *#
-
-#set($queryOpts = $params.get("queryOpts"))
-
-#if($queryOpts == "spatial")
-
- <div>
-
- #set($loc = $request.params.get('pt'))
- ## Normalize first trip through to "none" because
- ## an empty string generates an error message later on
- #if( ! $loc )
- #set( $loc = "none" )
- #end
-
- #set($dist = $request.params.get('d', "10"))
-
- ## Cities for The Select List
- #set( $cities = {
- "none": "No Filter",
- "45.17614,-93.87341": "Buffalo, MN",
- "37.7752,-100.0232": "Dodge City, KS",
- "35.0752,-97.032": "Oklahoma City, OK",
- "37.7752,-122.4232": "San Francisco CA"
- })
-
- <label #annTitle("Add the &pt parameter")>
- Location Filter:
- <select id="pt" name="pt">
-
- ## Generate <option> tag for each city
- #foreach( $city_lon_lat in $cities.keySet() )
- #set( $city_name = $cities.get($city_lon_lat) )
- <option value="$city_lon_lat"
- #if($loc == $city_lon_lat)selected="true"#end
- >
- $city_name
- </option>
- #end
-
- </select>
-
- </label>
-
- <span #annTitle("Add the &d parameter")>
- Distance (KM):
- <input id="d" name="d" type="text" size="6"
- value="#if($dist != '')${dist}#{else}10#end" ## TODO: isn't the default of 10 above sufficient? no if/else needed?
- />
- </span>
-
- <input type="hidden" name="sfield" value="store"/>
- <input type="hidden" id="spatialFQ" name="fq" value=""/>
- <input type="hidden" name="queryOpts" value="spatial"/>
-
- </div>
-
- <script type="text/javascript">
- $('#query-form').submit(function() {
- if ($("#pt").val() != "none") {
- $("#spatialFQ").val("{!bbox}");
- }
- $fqs = $("#allFQs").val();
- $fqs = $fqs.replace("{!bbox}", "");
- if ($fqs == ''){
- $("#allFQs").remove();
- }
- $("#allFQs").val($fqs);
- return true;
- });
- </script>
-
-#end
+++ /dev/null
-#**
- * Render the main Results List
- *#
-
-## Usually displayed inside <div class="results">
-
-#if($response.response.get('grouped'))
-
- #foreach($grouping in $response.response.get('grouped'))
- #parse("hit_grouped.vm")
- #end
-
-#else
-
- #foreach($doc in $response.results)
- #parse("hit.vm")
- ## Can get an extremely simple view of the doc
- ## which might be nicer for debugging
- ##parse("hit_plain.vm")
- #end
-
-#end
+++ /dev/null
-#**
- * Render a complex document in the results list
- *#
-
-## Load Mime-Type List and Mapping
-#parse('mime_type_lists.vm')
-## Sets:
-## * supportedMimeTypes, AKA supportedtypes
-## * mimeExtensionsMap, AKA extMap
-
-## Title
-#if($doc.getFieldValue('title'))
- #set($title = $esc.html($doc.getFirstValue('title')))
-#else
- #set($title = "["+$doc.getFieldValue('id')+"]")
-#end
-
-## URL
-#if($doc.getFieldValue('url'))
- #set($url = $doc.getFieldValue('url'))
-#elseif($doc.getFieldValue('resourcename'))
- #set($url = "file:///$doc.getFieldValue('resourcename')")
-#else
- #set($url = "$doc.getFieldValue('id')")
-#end
-
-## Sort out Mime-Type
-#set($ct = $list.get($doc.getFirstValue('content_type').split(";"),0))
-#set($filename = $doc.getFieldValue('resourcename'))
-#set($filetype = false)
-#set($filetype = $mimeExtensionsMap.get($ct))
-
-## TODO: falling back to file extension is convenient,
-## except when you don't have an icon for that extension
-## example "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-## document with a .docx extension.
-## It'd be nice to fall back to an "unknown" or the existing "file" type
-## We sort of do this below, but only if the filename has no extension
-## (anything after the last dot).
-
-#if(!$filetype)
- #set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1))
-#end
-
-## #if(!$filetype)
-## #set($filetype = "file")
-## #end
-## #if(!$supportedMimeTypes.contains($filetype))
-## #set($filetype = "file")
-## #end
-
-## Row 1: Icon and Title and mlt link
-<div class="result-title">
- ## Icon
- ## Small file type icons from http://www.splitbrain.org/projects/file_icons (public domain)
- <img src="#{url_root}/img/filetypes/${filetype}.png" align="center">
-
- ## Title, hyperlinked
- <a href="${url}" target="_blank">
- <b>$title</b></a>
-
- ## Link for MLT / More Like This / Find Similar
- <span class="mlt">
- #if($params.getBool('mlt', false) == false)
- <a href="#lensNoQ&q=id:%22$docId%22&mlt=true">
- More Like This</a>
- #end
- </span>
-
-</div>
-
-## Row 2?: ID / URL
-<div>
- Id: #field('id')
-</div>
-
-## Resource Name
-<div>
- #if($doc.getFieldValue('resourcename'))
- Resource name: $filename
- #elseif($url)
- URL: $url
- #end
- #if($ct)
- ($ct)
- #end
-</div>
-
-## Author
-#if($doc.getFieldValue('author'))
- <div>
- Author: #field('author')
- </div>
-#end
-
-## Last_Modified Date
-#if($doc.getFieldValue('last_modified'))
- <div>
- last-modified:
- #field('last_modified')
- </div>
-#end
-
-## Main content of doc
-<div class="result-body">
- #field('content')
-</div>
-
-## Display Similar Documents / MLT = More Like This
-<div class="mlt">
- #set($mlt = $mltResults.get($docId))
- #set($mltOn = $params.getBool('mlt'))
- #if($mltOn == true)
- <div class="field-name">
- Similar Items
- </div>
- #end
- ## If has MLT enabled An Entries to show
- #if ($mltOn && $mlt && $mlt.size() > 0)
- <ul>
- #foreach($mltHit in $mlt)
- #set($mltId = $mltHit.getFieldValue('id'))
- <li>
- <div>
- <a href="#url_for_home?q=id:$mltId">
- $mltId</a>
- </div>
- <div>
- <span class="field-name">
- Title:
- </span>
- $mltHit.getFieldValue('title')
- </div>
- <div>
- <span class="field-name">
- Author:
- </span>
- $mltHit.getFieldValue('author')
- <span class="field-name">
- Description:
- </span>
- $mltHit.getFieldValue('description')
- </div>
- </li>
- #end ## end for each mltHit in $mlt
- </ul>
- ## Else MLT Enabled but no mlt results for this query
- #elseif($mltOn && $mlt.size() == 0)
- <div>No Similar Items Found</div>
- #end
-</div> ## div class=mlt
-
-#parse('debug.vm')
+++ /dev/null
-#**
- * Provides cynamic spelling suggestions
- * as you type in the search form
- *#
-
-#foreach($t in $response.response.terms.name)
- $t.key
-#end
+++ /dev/null
-#**
- * Provides navigation/access to Advanced search options
- * Usually displayed near the top of the page
- *#
-
-##TODO: Make some nice tabs here
-
-#set($queryOpts = $params.get("queryOpts"))
-
-<div class="tabs-bar" #annTitle("Click the link to demonstrate various Solr capabilities")>
-
- <span>Type of Search:</span>
-
- ##queryOpts=$queryOpts
-
- ## return to Simple Search
- ##set( $selected = ($queryOpts && $queryOpts != "") )
- #set( $selected = ! $queryOpts )
- <span class="tab #if($selected)selected#end">
- #if($selected)
- Simple
- #else
- <a href="#url_for_home/?#debug#annotate">
- Simple</a>
- #end
- </span>
-
- ## GEO-Spatial / Location Based
- #set( $selected = ($queryOpts == "spatial") )
- <span class="tab #if($selected)selected#end">
- #if($selected)
- Spatial
- #else
- <a href="#url_for_home?&queryOpts=spatial#debug#annotate">
- Spatial</a>
- #end
- </span>
-
- ## Group By Field
- #set( $selected = ($queryOpts == "group") )
- <span class="tab #if($selected)selected#end">
- #if($selected)
- Group By
- #else
- <a href="#url_for_home?#debug#annotate&queryOpts=group&group=true&group.field=manu_exact">
- Group By</a>
- #end
- </span>
-
-</div>
+++ /dev/null
-<?xml version='1.0' encoding='UTF-8'?>
-
-<!--
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- -->
-
-<!--
- Simple transform of Solr query results to HTML
- -->
-<xsl:stylesheet version='1.0'
- xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
->
-
- <xsl:output media-type="text/html" encoding="UTF-8"/>
-
- <xsl:variable name="title" select="concat('Solr search results (',response/result/@numFound,' documents)')"/>
-
- <xsl:template match='/'>
- <html>
- <head>
- <title><xsl:value-of select="$title"/></title>
- <xsl:call-template name="css"/>
- </head>
- <body>
- <h1><xsl:value-of select="$title"/></h1>
- <div class="note">
- This has been formatted by the sample "example.xsl" transform -
- use your own XSLT to get a nicer page
- </div>
- <xsl:apply-templates select="response/result/doc"/>
- </body>
- </html>
- </xsl:template>
-
- <xsl:template match="doc">
- <xsl:variable name="pos" select="position()"/>
- <div class="doc">
- <table width="100%">
- <xsl:apply-templates>
- <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param>
- </xsl:apply-templates>
- </table>
- </div>
- </xsl:template>
-
- <xsl:template match="doc/*[@name='score']" priority="100">
- <xsl:param name="pos"></xsl:param>
- <tr>
- <td class="name">
- <xsl:value-of select="@name"/>
- </td>
- <td class="value">
- <xsl:value-of select="."/>
-
- <xsl:if test="boolean(//lst[@name='explain'])">
- <xsl:element name="a">
- <!-- can't allow whitespace here -->
- <xsl:attribute name="href">javascript:toggle("<xsl:value-of select="concat('exp-',$pos)" />");</xsl:attribute>?</xsl:element>
- <br/>
- <xsl:element name="div">
- <xsl:attribute name="class">exp</xsl:attribute>
- <xsl:attribute name="id">
- <xsl:value-of select="concat('exp-',$pos)" />
- </xsl:attribute>
- <xsl:value-of select="//lst[@name='explain']/str[position()=$pos]"/>
- </xsl:element>
- </xsl:if>
- </td>
- </tr>
- </xsl:template>
-
- <xsl:template match="doc/arr" priority="100">
- <tr>
- <td class="name">
- <xsl:value-of select="@name"/>
- </td>
- <td class="value">
- <ul>
- <xsl:for-each select="*">
- <li><xsl:value-of select="."/></li>
- </xsl:for-each>
- </ul>
- </td>
- </tr>
- </xsl:template>
-
-
- <xsl:template match="doc/*">
- <tr>
- <td class="name">
- <xsl:value-of select="@name"/>
- </td>
- <td class="value">
- <xsl:value-of select="."/>
- </td>
- </tr>
- </xsl:template>
-
- <xsl:template match="*"/>
-
- <xsl:template name="css">
- <script>
- function toggle(id) {
- var obj = document.getElementById(id);
- obj.style.display = (obj.style.display != 'block') ? 'block' : 'none';
- }
- </script>
- <style type="text/css">
- body { font-family: "Lucida Grande", sans-serif }
- td.name { font-style: italic; font-size:80%; }
- td { vertical-align: top; }
- ul { margin: 0px; margin-left: 1em; padding: 0px; }
- .note { font-size:80%; }
- .doc { margin-top: 1em; border-top: solid grey 1px; }
- .exp { display: none; font-family: monospace; white-space: pre; }
- </style>
- </xsl:template>
-
-</xsl:stylesheet>
+++ /dev/null
-<?xml version='1.0' encoding='UTF-8'?>
-
-<!--
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- -->
-
-<!--
- Simple transform of Solr query results to Atom
- -->
-
-<xsl:stylesheet version='1.0'
- xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
-
- <xsl:output
- method="xml"
- encoding="utf-8"
- media-type="application/xml"
- />
-
- <xsl:template match='/'>
- <xsl:variable name="query" select="response/lst[@name='responseHeader']/lst[@name='params']/str[@name='q']"/>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Solr Atom 1.0 Feed</title>
- <subtitle>
- This has been formatted by the sample "example_atom.xsl" transform -
- use your own XSLT to get a nicer Atom feed.
- </subtitle>
- <author>
- <name>Apache Solr</name>
- <email>solr-user@lucene.apache.org</email>
- </author>
- <link rel="self" type="application/atom+xml"
- href="http://localhost:8983/solr/q={$query}&wt=xslt&tr=atom.xsl"/>
- <updated>
- <xsl:value-of select="response/result/doc[position()=1]/date[@name='timestamp']"/>
- </updated>
- <id>tag:localhost,2007:example</id>
- <xsl:apply-templates select="response/result/doc"/>
- </feed>
- </xsl:template>
-
- <!-- search results xslt -->
- <xsl:template match="doc">
- <xsl:variable name="id" select="str[@name='id']"/>
- <entry>
- <title><xsl:value-of select="str[@name='name']"/></title>
- <link href="http://localhost:8983/solr/select?q={$id}"/>
- <id>tag:localhost,2007:<xsl:value-of select="$id"/></id>
- <summary><xsl:value-of select="arr[@name='features']"/></summary>
- <updated><xsl:value-of select="date[@name='timestamp']"/></updated>
- </entry>
- </xsl:template>
-
-</xsl:stylesheet>
+++ /dev/null
-<?xml version='1.0' encoding='UTF-8'?>
-
-<!--
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- -->
-
-<!--
- Simple transform of Solr query results to RSS
- -->
-
-<xsl:stylesheet version='1.0'
- xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
-
- <xsl:output
- method="xml"
- encoding="utf-8"
- media-type="application/xml"
- />
- <xsl:template match='/'>
- <rss version="2.0">
- <channel>
- <title>Example Solr RSS 2.0 Feed</title>
- <link>http://localhost:8983/solr</link>
- <description>
- This has been formatted by the sample "example_rss.xsl" transform -
- use your own XSLT to get a nicer RSS feed.
- </description>
- <language>en-us</language>
- <docs>http://localhost:8983/solr</docs>
- <xsl:apply-templates select="response/result/doc"/>
- </channel>
- </rss>
- </xsl:template>
-
- <!-- search results xslt -->
- <xsl:template match="doc">
- <xsl:variable name="id" select="str[@name='id']"/>
- <xsl:variable name="timestamp" select="date[@name='timestamp']"/>
- <item>
- <title><xsl:value-of select="str[@name='name']"/></title>
- <link>
- http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
- </link>
- <description>
- <xsl:value-of select="arr[@name='features']"/>
- </description>
- <pubDate><xsl:value-of select="$timestamp"/></pubDate>
- <guid>
- http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
- </guid>
- </item>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-
-<!--
- Display the luke request handler with graphs
- -->
-<xsl:stylesheet
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns="http://www.w3.org/1999/xhtml"
- version="1.0"
- >
- <xsl:output
- method="html"
- encoding="UTF-8"
- media-type="text/html"
- doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
- doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
- />
-
- <xsl:variable name="title">Solr Luke Request Handler Response</xsl:variable>
-
- <xsl:template match="/">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <link rel="stylesheet" type="text/css" href="solr-admin.css"/>
- <link rel="icon" href="favicon.ico" type="image/ico"/>
- <link rel="shortcut icon" href="favicon.ico" type="image/ico"/>
- <title>
- <xsl:value-of select="$title"/>
- </title>
- <xsl:call-template name="css"/>
-
- </head>
- <body>
- <h1>
- <xsl:value-of select="$title"/>
- </h1>
- <div class="doc">
- <ul>
- <xsl:if test="response/lst[@name='index']">
- <li>
- <a href="#index">Index Statistics</a>
- </li>
- </xsl:if>
- <xsl:if test="response/lst[@name='fields']">
- <li>
- <a href="#fields">Field Statistics</a>
- <ul>
- <xsl:for-each select="response/lst[@name='fields']/lst">
- <li>
- <a href="#{@name}">
- <xsl:value-of select="@name"/>
- </a>
- </li>
- </xsl:for-each>
- </ul>
- </li>
- </xsl:if>
- <xsl:if test="response/lst[@name='doc']">
- <li>
- <a href="#doc">Document statistics</a>
- </li>
- </xsl:if>
- </ul>
- </div>
- <xsl:if test="response/lst[@name='index']">
- <h2><a name="index"/>Index Statistics</h2>
- <xsl:apply-templates select="response/lst[@name='index']"/>
- </xsl:if>
- <xsl:if test="response/lst[@name='fields']">
- <h2><a name="fields"/>Field Statistics</h2>
- <xsl:apply-templates select="response/lst[@name='fields']"/>
- </xsl:if>
- <xsl:if test="response/lst[@name='doc']">
- <h2><a name="doc"/>Document statistics</h2>
- <xsl:apply-templates select="response/lst[@name='doc']"/>
- </xsl:if>
- </body>
- </html>
- </xsl:template>
-
- <xsl:template match="lst">
- <xsl:if test="parent::lst">
- <tr>
- <td colspan="2">
- <div class="doc">
- <xsl:call-template name="list"/>
- </div>
- </td>
- </tr>
- </xsl:if>
- <xsl:if test="not(parent::lst)">
- <div class="doc">
- <xsl:call-template name="list"/>
- </div>
- </xsl:if>
- </xsl:template>
-
- <xsl:template name="list">
- <xsl:if test="count(child::*)>0">
- <table>
- <thead>
- <tr>
- <th colspan="2">
- <p>
- <a name="{@name}"/>
- </p>
- <xsl:value-of select="@name"/>
- </th>
- </tr>
- </thead>
- <tbody>
- <xsl:choose>
- <xsl:when
- test="@name='histogram'">
- <tr>
- <td colspan="2">
- <xsl:call-template name="histogram"/>
- </td>
- </tr>
- </xsl:when>
- <xsl:otherwise>
- <xsl:apply-templates/>
- </xsl:otherwise>
- </xsl:choose>
- </tbody>
- </table>
- </xsl:if>
- </xsl:template>
-
- <xsl:template name="histogram">
- <div class="doc">
- <xsl:call-template name="barchart">
- <xsl:with-param name="max_bar_width">50</xsl:with-param>
- <xsl:with-param name="iwidth">800</xsl:with-param>
- <xsl:with-param name="iheight">160</xsl:with-param>
- <xsl:with-param name="fill">blue</xsl:with-param>
- </xsl:call-template>
- </div>
- </xsl:template>
-
- <xsl:template name="barchart">
- <xsl:param name="max_bar_width"/>
- <xsl:param name="iwidth"/>
- <xsl:param name="iheight"/>
- <xsl:param name="fill"/>
- <xsl:variable name="max">
- <xsl:for-each select="int">
- <xsl:sort data-type="number" order="descending"/>
- <xsl:if test="position()=1">
- <xsl:value-of select="."/>
- </xsl:if>
- </xsl:for-each>
- </xsl:variable>
- <xsl:variable name="bars">
- <xsl:value-of select="count(int)"/>
- </xsl:variable>
- <xsl:variable name="bar_width">
- <xsl:choose>
- <xsl:when test="$max_bar_width < ($iwidth div $bars)">
- <xsl:value-of select="$max_bar_width"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="$iwidth div $bars"/>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:variable>
- <table class="histogram">
- <tbody>
- <tr>
- <xsl:for-each select="int">
- <td>
- <xsl:value-of select="."/>
- <div class="histogram">
- <xsl:attribute name="style">background-color: <xsl:value-of select="$fill"/>; width: <xsl:value-of select="$bar_width"/>px; height: <xsl:value-of select="($iheight*number(.)) div $max"/>px;</xsl:attribute>
- </div>
- </td>
- </xsl:for-each>
- </tr>
- <tr>
- <xsl:for-each select="int">
- <td>
- <xsl:value-of select="@name"/>
- </td>
- </xsl:for-each>
- </tr>
- </tbody>
- </table>
- </xsl:template>
-
- <xsl:template name="keyvalue">
- <xsl:choose>
- <xsl:when test="@name">
- <tr>
- <td class="name">
- <xsl:value-of select="@name"/>
- </td>
- <td class="value">
- <xsl:value-of select="."/>
- </td>
- </tr>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="."/>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:template>
-
- <xsl:template match="int|bool|long|float|double|uuid|date">
- <xsl:call-template name="keyvalue"/>
- </xsl:template>
-
- <xsl:template match="arr">
- <tr>
- <td class="name">
- <xsl:value-of select="@name"/>
- </td>
- <td class="value">
- <ul>
- <xsl:for-each select="child::*">
- <li>
- <xsl:apply-templates/>
- </li>
- </xsl:for-each>
- </ul>
- </td>
- </tr>
- </xsl:template>
-
- <xsl:template match="str">
- <xsl:choose>
- <xsl:when test="@name='schema' or @name='index' or @name='flags'">
- <xsl:call-template name="schema"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:call-template name="keyvalue"/>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:template>
-
- <xsl:template name="schema">
- <tr>
- <td class="name">
- <xsl:value-of select="@name"/>
- </td>
- <td class="value">
- <xsl:if test="contains(.,'unstored')">
- <xsl:value-of select="."/>
- </xsl:if>
- <xsl:if test="not(contains(.,'unstored'))">
- <xsl:call-template name="infochar2string">
- <xsl:with-param name="charList">
- <xsl:value-of select="."/>
- </xsl:with-param>
- </xsl:call-template>
- </xsl:if>
- </td>
- </tr>
- </xsl:template>
-
- <xsl:template name="infochar2string">
- <xsl:param name="i">1</xsl:param>
- <xsl:param name="charList"/>
-
- <xsl:variable name="char">
- <xsl:value-of select="substring($charList,$i,1)"/>
- </xsl:variable>
- <xsl:choose>
- <xsl:when test="$char='I'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='I']"/> - </xsl:when>
- <xsl:when test="$char='T'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='T']"/> - </xsl:when>
- <xsl:when test="$char='S'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='S']"/> - </xsl:when>
- <xsl:when test="$char='M'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='M']"/> - </xsl:when>
- <xsl:when test="$char='V'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='V']"/> - </xsl:when>
- <xsl:when test="$char='o'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='o']"/> - </xsl:when>
- <xsl:when test="$char='p'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='p']"/> - </xsl:when>
- <xsl:when test="$char='O'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='O']"/> - </xsl:when>
- <xsl:when test="$char='L'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='L']"/> - </xsl:when>
- <xsl:when test="$char='B'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='B']"/> - </xsl:when>
- <xsl:when test="$char='C'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='C']"/> - </xsl:when>
- <xsl:when test="$char='f'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='f']"/> - </xsl:when>
- <xsl:when test="$char='l'">
- <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='l']"/> -
- </xsl:when>
- </xsl:choose>
-
- <xsl:if test="not($i>=string-length($charList))">
- <xsl:call-template name="infochar2string">
- <xsl:with-param name="i">
- <xsl:value-of select="$i+1"/>
- </xsl:with-param>
- <xsl:with-param name="charList">
- <xsl:value-of select="$charList"/>
- </xsl:with-param>
- </xsl:call-template>
- </xsl:if>
- </xsl:template>
- <xsl:template name="css">
- <style type="text/css">
- <![CDATA[
- td.name {font-style: italic; font-size:80%; }
- .doc { margin: 0.5em; border: solid grey 1px; }
- .exp { display: none; font-family: monospace; white-space: pre; }
- div.histogram { background: none repeat scroll 0%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial;}
- table.histogram { width: auto; vertical-align: bottom; }
- table.histogram td, table.histogram th { text-align: center; vertical-align: bottom; border-bottom: 1px solid #ff9933; width: auto; }
- ]]>
- </style>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-<!--
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- -->
-
-<!--
- Simple transform of Solr query response into Solr Update XML compliant XML.
- When used in the xslt response writer you will get UpdaateXML as output.
- But you can also store a query response XML to disk and feed this XML to
- the XSLTUpdateRequestHandler to index the content. Provided as example only.
- See http://wiki.apache.org/solr/XsltUpdateRequestHandler for more info
- -->
-<xsl:stylesheet version='1.0' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
- <xsl:output media-type="text/xml" method="xml" indent="yes"/>
-
- <xsl:template match='/'>
- <add>
- <xsl:apply-templates select="response/result/doc"/>
- </add>
- </xsl:template>
-
- <!-- Ignore score (makes no sense to index) -->
- <xsl:template match="doc/*[@name='score']" priority="100">
- </xsl:template>
-
- <xsl:template match="doc">
- <xsl:variable name="pos" select="position()"/>
- <doc>
- <xsl:apply-templates>
- <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param>
- </xsl:apply-templates>
- </doc>
- </xsl:template>
-
- <!-- Flatten arrays to duplicate field lines -->
- <xsl:template match="doc/arr" priority="100">
- <xsl:variable name="fn" select="@name"/>
-
- <xsl:for-each select="*">
- <xsl:element name="field">
- <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute>
- <xsl:value-of select="."/>
- </xsl:element>
- </xsl:for-each>
- </xsl:template>
-
-
- <xsl:template match="doc/*">
- <xsl:variable name="fn" select="@name"/>
-
- <xsl:element name="field">
- <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute>
- <xsl:value-of select="."/>
- </xsl:element>
- </xsl:template>
-
- <xsl:template match="*"/>
-</xsl:stylesheet>
+++ /dev/null
-name=collection1
\ No newline at end of file
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is an example of a simple "solr.xml" file for configuring one or
- more Solr Cores, as well as allowing Cores to be added, removed, and
- reloaded via HTTP requests.
-
- More information about options available in this configuration file,
- and Solr Core administration can be found online:
- http://wiki.apache.org/solr/CoreAdmin
--->
-
-<solr>
-
- <solrcloud>
- <str name="host">${host:}</str>
- <int name="hostPort">${jetty.port:8983}</int>
- <str name="hostContext">${hostContext:solr}</str>
- <int name="zkClientTimeout">${zkClientTimeout:15000}</int>
- <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool>
- </solrcloud>
-
- <shardHandlerFactory name="shardHandlerFactory"
- class="HttpShardHandlerFactory">
- <int name="socketTimeout">${socketTimeout:0}</int>
- <int name="connTimeout">${connTimeout:0}</int>
- </shardHandlerFactory>
-
-</solr>
+++ /dev/null
-# The number of milliseconds of each tick
-tickTime=2000
-# The number of ticks that the initial
-# synchronization phase can take
-initLimit=10
-# The number of ticks that can pass between
-# sending a request and getting an acknowledgement
-syncLimit=5
-
-# the directory where the snapshot is stored.
-# dataDir=/opt/zookeeper/data
-# NOTE: Solr defaults the dataDir to <solrHome>/zoo_data
-
-# the port at which the clients will connect
-# clientPort=2181
-# NOTE: Solr sets this based on zkRun / zkHost params
-
+++ /dev/null
-../../dist/solr-4.6.0.war
\ No newline at end of file
+++ /dev/null
-../scripts/zookeeper.sh
\ No newline at end of file