From: Dennis Schafroth Date: Fri, 29 Nov 2013 11:14:30 +0000 (+0100) Subject: Zookeeper setup X-Git-Tag: v0.6.0~10 X-Git-Url: http://jsfdemo.indexdata.com/cgi-bin?a=commitdiff_plain;h=df39a9c5d2f4247b0682f2c6cf0d64b22edfaadd;p=lui-solr.git Zookeeper setup --- diff --git a/zookeeper/.#make_same_host_config.sh b/zookeeper/.#make_same_host_config.sh new file mode 120000 index 0000000..da3e3cb --- /dev/null +++ b/zookeeper/.#make_same_host_config.sh @@ -0,0 +1 @@ +dennis@opencontent-solr.index.567:1381402202 \ No newline at end of file diff --git a/zookeeper/README.txt b/zookeeper/README.txt new file mode 100644 index 0000000..b95697f --- /dev/null +++ b/zookeeper/README.txt @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Solr example +------------ + +This directory contains an instance of the Jetty Servlet container setup to +run Solr using an example configuration. + +To run this example: + + java -jar start.jar + +in this example directory, and when Solr is started connect to + + http://localhost:8983/solr/ + +To add documents to the index, use the post.jar (or post.sh script) in +the example/exampledocs subdirectory (while Solr is running), for example: + + cd exampledocs + java -jar post.jar *.xml +Or: sh post.sh *.xml + +For more information about this example please read... + + * example/solr/README.txt + For more information about the "Solr Home" and Solr specific configuration + * http://lucene.apache.org/solr/tutorial.html + For a Tutorial using this example configuration + * http://wiki.apache.org/solr/SolrResources + For a list of other tutorials and introductory articles. + +Notes About These Examples +-------------------------- + +* SolrHome * + +By default, start.jar starts Solr in Jetty using the default Solr Home +directory of "./solr/" (relative to the working directory of hte servlet +container). To run other example configurations, you can specify the +solr.solr.home system property when starting jetty... + + java -Dsolr.solr.home=multicore -jar start.jar + java -Dsolr.solr.home=example-DIH/solr -jar start.jar + +* References to Jar Files Outside This Directory * + +Various example SolrHome dirs contained in this directory may use "" +statements in the solrconfig.xml file to reference plugin jars outside of +this directory for loading "contrib" plugins via relative paths. + +If you make a copy of this example server and wish to use the +ExtractingRequestHandler (SolrCell), DataImportHandler (DIH), UIMA, the +clustering component, or any other modules in "contrib", you will need to +copy the required jars or update the paths to those jars in your +solrconfig.xml. + +* Logging * + +By default, Jetty & Solr will log to the console a logs/solr.log. This can be convenient when +first getting started, but eventually you will want to log just to a file. To +configure logging, edit the log4j.properties file in "resources". + +It is also possible to setup log4j or other popular logging frameworks. + diff --git a/zookeeper/cloud-scripts/log4j.properties b/zookeeper/cloud-scripts/log4j.properties new file mode 100644 index 0000000..c581583 --- /dev/null +++ b/zookeeper/cloud-scripts/log4j.properties @@ -0,0 +1,8 @@ +# Logging level +log4j.rootLogger=INFO, stderr + +# log to stderr +log4j.appender.stderr = org.apache.log4j.ConsoleAppender +log4j.appender.stderr.Target = System.err +log4j.appender.stderr.layout = org.apache.log4j.PatternLayout +log4j.appender.stderr.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n diff --git a/zookeeper/cloud-scripts/zkcli.bat b/zookeeper/cloud-scripts/zkcli.bat new file mode 100644 index 0000000..8232a72 --- /dev/null +++ b/zookeeper/cloud-scripts/zkcli.bat @@ -0,0 +1,11 @@ +REM You can override pass the following parameters to this script: +REM + +set JVM=java + +REM Find location of this script + +set SDIR=%~dp0 +if "%SDIR:~-1%"=="\" set SDIR=%SDIR:~0,-1% + +"%JVM%" -Dlog4j.configuration=file:%SDIR%\log4j.properties -classpath "%SDIR%\..\solr-webapp\webapp\WEB-INF\lib\*;%SDIR%\..\lib\ext\*" org.apache.solr.cloud.ZkCLI %* diff --git a/zookeeper/cloud-scripts/zkcli.sh b/zookeeper/cloud-scripts/zkcli.sh new file mode 100755 index 0000000..ab5da96 --- /dev/null +++ b/zookeeper/cloud-scripts/zkcli.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# You can override pass the following parameters to this script: +# + +JVM="java" + +# Find location of this script + +sdir="`dirname \"$0\"`" + +PATH=$JAVA_HOME/bin:$PATH $JVM -Dlog4j.configuration=file:$sdir/log4j.properties -classpath "$sdir/../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../lib/ext/*" org.apache.solr.cloud.ZkCLI ${1+"$@"} + diff --git a/zookeeper/contexts/solr-jetty-context.xml b/zookeeper/contexts/solr-jetty-context.xml new file mode 100644 index 0000000..50978a3 --- /dev/null +++ b/zookeeper/contexts/solr-jetty-context.xml @@ -0,0 +1,8 @@ + + + + + /webapps/solr.war + /etc/webdefault.xml + /solr-webapp + diff --git a/zookeeper/etc/create-solrtest.keystore.sh b/zookeeper/etc/create-solrtest.keystore.sh new file mode 100755 index 0000000..d3decee --- /dev/null +++ b/zookeeper/etc/create-solrtest.keystore.sh @@ -0,0 +1,37 @@ +#!/bin/bash -ex + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############ + +# This script shows how the solrtest.keystore file used for solr tests +# and these example configs was generated. +# +# Running this script should only be necessary if the keystore file +# needs to be replaced, which shouldn't be required until sometime around +# the year 4751. +# +# NOTE: the "-ext" option used in the "keytool" command requires that you have +# the java7 version of keytool, but the generated key will work with any +# version of java + +echo "### remove old keystore" +rm -f solrtest.keystore + +echo "### create keystore and keys" +keytool -keystore solrtest.keystore -storepass "secret" -alias solrtest -keypass "secret" -genkey -keyalg RSA -dname "cn=localhost, ou=SolrTest, o=lucene.apache.org, c=US" -ext "san=ip:127.0.0.1" -validity 999999 + + diff --git a/zookeeper/etc/jetty.xml b/zookeeper/etc/jetty.xml new file mode 100644 index 0000000..8e7093d --- /dev/null +++ b/zookeeper/etc/jetty.xml @@ -0,0 +1,205 @@ + + + + + + + + + + + + + + + + + + + + + 10 + 10000 + false + + + + + + + + + + + + + + log4j.configuration etc/log4j.properties + + + 50000 + 1500 + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + false + false + 1000 + false + false + + + + + + + + + + + + org.eclipse.jetty.server.webapp.ContainerIncludeJarPattern + .*/servlet-api-[^/]*\.jar$ + + + + + + + + + + + + + + + /contexts + 0 + + + + + + diff --git a/zookeeper/etc/logging.properties b/zookeeper/etc/logging.properties new file mode 100644 index 0000000..063b36f --- /dev/null +++ b/zookeeper/etc/logging.properties @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# To use this log config, start solr with the following system property: +# -Djava.util.logging.config.file=etc/logging.properties + +## Default global logging level: +.level = INFO + +## Log every update command (add, delete, commit, ...) +#org.apache.solr.update.processor.LogUpdateProcessor.level = FINE + +## Where to log (space separated list). +handlers = java.util.logging.FileHandler + +java.util.logging.FileHandler.level = FINE + +java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter + +# 1 GB limit per file +java.util.logging.FileHandler.limit = 1073741824 + +# Log to the logs directory, with log files named solrxxx.log +java.util.logging.FileHandler.pattern = ./logs/solr%u.log \ No newline at end of file diff --git a/zookeeper/etc/solrtest.keystore b/zookeeper/etc/solrtest.keystore new file mode 100644 index 0000000..bcc6ec0 Binary files /dev/null and b/zookeeper/etc/solrtest.keystore differ diff --git a/zookeeper/etc/webdefault.xml b/zookeeper/etc/webdefault.xml new file mode 100644 index 0000000..213138b --- /dev/null +++ b/zookeeper/etc/webdefault.xml @@ -0,0 +1,527 @@ + + + + + + + + + + + + + + + + + + + + + + + Default web.xml file. + This file is applied to a Web application before it's own WEB_INF/web.xml file + + + + + + + + org.eclipse.jetty.servlet.listener.ELContextCleaner + + + + + + + + org.eclipse.jetty.servlet.listener.IntrospectorCleaner + + + + + + + + + + + + + + + + + + + default + org.eclipse.jetty.servlet.DefaultServlet + + aliases + false + + + acceptRanges + true + + + dirAllowed + true + + + welcomeServlets + false + + + redirectWelcome + false + + + maxCacheSize + 256000000 + + + maxCachedFileSize + 200000000 + + + maxCachedFiles + 2048 + + + gzip + true + + + useFileMappedBuffer + true + + + + 0 + + + + default + / + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jsp + org.apache.jasper.servlet.JspServlet + + logVerbosityLevel + DEBUG + + + fork + false + + + xpoweredBy + false + + + 0 + + + + jsp + *.jsp + *.jspf + *.jspx + *.xsp + *.JSP + *.JSPF + *.JSPX + *.XSP + + + + + + + + + + + + + + + + + + + + + + + + + + + + 30 + + + + + + + + + + + + + index.html + index.htm + index.jsp + + + + + + ar + ISO-8859-6 + + + be + ISO-8859-5 + + + bg + ISO-8859-5 + + + ca + ISO-8859-1 + + + cs + ISO-8859-2 + + + da + ISO-8859-1 + + + de + ISO-8859-1 + + + el + ISO-8859-7 + + + en + ISO-8859-1 + + + es + ISO-8859-1 + + + et + ISO-8859-1 + + + fi + ISO-8859-1 + + + fr + ISO-8859-1 + + + hr + ISO-8859-2 + + + hu + ISO-8859-2 + + + is + ISO-8859-1 + + + it + ISO-8859-1 + + + iw + ISO-8859-8 + + + ja + Shift_JIS + + + ko + EUC-KR + + + lt + ISO-8859-2 + + + lv + ISO-8859-2 + + + mk + ISO-8859-5 + + + nl + ISO-8859-1 + + + no + ISO-8859-1 + + + pl + ISO-8859-2 + + + pt + ISO-8859-1 + + + ro + ISO-8859-2 + + + ru + ISO-8859-5 + + + sh + ISO-8859-5 + + + sk + ISO-8859-2 + + + sl + ISO-8859-2 + + + sq + ISO-8859-2 + + + sr + ISO-8859-5 + + + sv + ISO-8859-1 + + + tr + ISO-8859-9 + + + uk + ISO-8859-5 + + + zh + GB2312 + + + zh_TW + Big5 + + + + + + Disable TRACE + / + TRACE + + + + + + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/currency.xml b/zookeeper/example-schemaless/solr/collection1/conf/currency.xml new file mode 100644 index 0000000..3a9c58a --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/elevate.xml b/zookeeper/example-schemaless/solr/collection1/conf/elevate.xml new file mode 100644 index 0000000..25d5ceb --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ca.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 0000000..307a85f --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_fr.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 0000000..f1bba51 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,15 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j +d +c +jusqu +quoiqu +lorsqu +puisqu diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ga.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 0000000..9ebe7fa --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_it.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_it.txt new file mode 100644 index 0000000..cac0409 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/hyphenations_ga.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 0000000..4d2642c --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stemdict_nl.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 0000000..4410729 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stoptags_ja.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 0000000..71b7508 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, å¹¾ +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, ä¾­, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, å­¦, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (æ°´), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (å½¼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ar.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 0000000..046829d --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_bg.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 0000000..1ae4ba2 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ca.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 0000000..3da65de --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_cz.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 0000000..53c6097 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +jeÅ¡tě +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_da.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 0000000..a3ff5fe --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_de.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 0000000..f770384 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_el.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 0000000..232681f --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_en.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 0000000..2c164c0 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_es.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 0000000..2db1476 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_eu.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 0000000..25f1db9 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fa.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 0000000..723641c --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fi.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 0000000..addad79 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fr.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 0000000..20d12cb --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,184 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +cela | that +celà | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ga.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 0000000..9ff88d7 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_gl.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 0000000..d8760b1 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hi.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 0000000..86286bb --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hu.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 0000000..1a96f1d --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hy.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 0000000..60c1c50 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +դու +դուք +Õ¥Õ´ +Õ¥Õ¶ +ենք +Õ¥Õ½ +եք +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +էինք +էիր +էիք +էր +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +իր +Õ¯Õ¡Õ´ +համար +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +մենք +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +ÕºÕ«Õ¿Õ« +վրա +և diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_id.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 0000000..4617f83 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_it.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 0000000..4cb5b08 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ja.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 0000000..d4321be --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_lv.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 0000000..e21a23c --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +ārpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretÄ« +arÄ« +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taču +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_nl.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 0000000..f4d61f5 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_no.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 0000000..e76f36e --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_pt.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 0000000..276c1b4 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ro.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 0000000..4fdee90 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ru.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 0000000..6430769 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_sv.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 0000000..22bddfd --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_th.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 0000000..07f0fab --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_tr.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 0000000..84d9408 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/userdict_ja.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 0000000..6f0368e --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/zookeeper/example-schemaless/solr/collection1/conf/protwords.txt b/zookeeper/example-schemaless/solr/collection1/conf/protwords.txt new file mode 100644 index 0000000..1dfc0ab --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/schema.xml b/zookeeper/example-schemaless/solr/collection1/conf/schema.xml new file mode 100644 index 0000000..a157715 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/schema.xml @@ -0,0 +1,1072 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/solrconfig.xml b/zookeeper/example-schemaless/solr/collection1/conf/solrconfig.xml new file mode 100644 index 0000000..d9c941f --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,1888 @@ + + + + + + + + + 4.4 + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + true + managed-schema + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + 15000 + false + + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + + + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + add-unknown-fields-to-the-schema + + + + + + + application/json + add-unknown-fields-to-the-schema + + + + + application/csv + add-unknown-fields-to-the-schema + + + + + + + true + ignored_ + + + true + links + ignored_ + + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd + + + + text_general + + java.lang.Boolean + booleans + + + java.util.Date + tdates + + + java.lang.Long + java.lang.Integer + tlongs + + + java.lang.Number + tdoubles + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/zookeeper/example-schemaless/solr/collection1/conf/stopwords.txt b/zookeeper/example-schemaless/solr/collection1/conf/stopwords.txt new file mode 100644 index 0000000..ae1e83e --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/zookeeper/example-schemaless/solr/collection1/conf/synonyms.txt b/zookeeper/example-schemaless/solr/collection1/conf/synonyms.txt new file mode 100644 index 0000000..7f72128 --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/zookeeper/example-schemaless/solr/collection1/core.properties b/zookeeper/example-schemaless/solr/collection1/core.properties new file mode 100644 index 0000000..bc0cf7d --- /dev/null +++ b/zookeeper/example-schemaless/solr/collection1/core.properties @@ -0,0 +1 @@ +name=collection1 \ No newline at end of file diff --git a/zookeeper/exampledocs/books.csv b/zookeeper/exampledocs/books.csv new file mode 100644 index 0000000..8ccecbb --- /dev/null +++ b/zookeeper/exampledocs/books.csv @@ -0,0 +1,11 @@ +id,cat,name,price,inStock,author,series_t,sequence_i,genre_s +0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy +0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy +055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy +0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi +0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy +0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi +0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy +0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy +0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy +080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy diff --git a/zookeeper/exampledocs/books.json b/zookeeper/exampledocs/books.json new file mode 100644 index 0000000..f82d510 --- /dev/null +++ b/zookeeper/exampledocs/books.json @@ -0,0 +1,51 @@ +[ + { + "id" : "978-0641723445", + "cat" : ["book","hardcover"], + "name" : "The Lightning Thief", + "author" : "Rick Riordan", + "series_t" : "Percy Jackson and the Olympians", + "sequence_i" : 1, + "genre_s" : "fantasy", + "inStock" : true, + "price" : 12.50, + "pages_i" : 384 + } +, + { + "id" : "978-1423103349", + "cat" : ["book","paperback"], + "name" : "The Sea of Monsters", + "author" : "Rick Riordan", + "series_t" : "Percy Jackson and the Olympians", + "sequence_i" : 2, + "genre_s" : "fantasy", + "inStock" : true, + "price" : 6.49, + "pages_i" : 304 + } +, + { + "id" : "978-1857995879", + "cat" : ["book","paperback"], + "name" : "Sophie's World : The Greek Philosophers", + "author" : "Jostein Gaarder", + "sequence_i" : 1, + "genre_s" : "fantasy", + "inStock" : true, + "price" : 3.07, + "pages_i" : 64 + } +, + { + "id" : "978-1933988177", + "cat" : ["book","paperback"], + "name" : "Lucene in Action, Second Edition", + "author" : "Michael McCandless", + "sequence_i" : 1, + "genre_s" : "IT", + "inStock" : true, + "price" : 30.50, + "pages_i" : 475 + } +] diff --git a/zookeeper/exampledocs/gb18030-example.xml b/zookeeper/exampledocs/gb18030-example.xml new file mode 100644 index 0000000..769be19 --- /dev/null +++ b/zookeeper/exampledocs/gb18030-example.xml @@ -0,0 +1,32 @@ + + + + + + GB18030TEST + Test with some GB18030 encoded characters + No accents here + ÕâÊÇÒ»¸ö¹¦ÄÜ + This is a feature (translated) + Õâ·ÝÎļþÊǺÜÓйâÔó + This document is very shiny (translated) + 0 + true + + + diff --git a/zookeeper/exampledocs/hd.xml b/zookeeper/exampledocs/hd.xml new file mode 100644 index 0000000..3c5448d --- /dev/null +++ b/zookeeper/exampledocs/hd.xml @@ -0,0 +1,56 @@ + + + + + SP2514N + Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133 + Samsung Electronics Co. Ltd. + + samsung + electronics + hard drive + 7200RPM, 8MB cache, IDE Ultra ATA-133 + NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor + 92 + 6 + true + 2006-02-13T15:26:37Z + + 35.0752,-97.032 + + + + 6H500F0 + Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300 + Maxtor Corp. + + maxtor + electronics + hard drive + SATA 3.0Gb/s, NCQ + 8.5ms seek + 16MB cache + 350 + 6 + true + + 45.17614,-93.87341 + 2006-02-13T15:26:37Z + + + diff --git a/zookeeper/exampledocs/ipod_other.xml b/zookeeper/exampledocs/ipod_other.xml new file mode 100644 index 0000000..7756c9f --- /dev/null +++ b/zookeeper/exampledocs/ipod_other.xml @@ -0,0 +1,60 @@ + + + + + + F8V7067-APL-KIT + Belkin Mobile Power Cord for iPod w/ Dock + Belkin + + belkin + electronics + connector + car power adapter, white + 4 + 19.95 + 1 + false + + 45.18014,-93.87741 + 2005-08-01T16:30:25Z + + + + IW-02 + iPod & iPod Mini USB 2.0 Cable + Belkin + + belkin + electronics + connector + car power adapter for iPod, white + 2 + 11.50 + 1 + false + + 37.7752,-122.4232 + 2006-02-14T23:55:59Z + + + + + + + diff --git a/zookeeper/exampledocs/ipod_video.xml b/zookeeper/exampledocs/ipod_video.xml new file mode 100644 index 0000000..1ca5f6f --- /dev/null +++ b/zookeeper/exampledocs/ipod_video.xml @@ -0,0 +1,40 @@ + + + + MA147LL/A + Apple 60 GB iPod with Video Playback Black + Apple Computer Inc. + + apple + electronics + music + iTunes, Podcasts, Audiobooks + Stores up to 15,000 songs, 25,000 photos, or 150 hours of video + 2.5-inch, 320x240 color TFT LCD display with LED backlight + Up to 20 hours of battery life + Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video + Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication + earbud headphones, USB cable + 5.5 + 399.00 + 10 + true + + 37.7752,-100.0232 + 2005-10-12T08:00:00Z + diff --git a/zookeeper/exampledocs/manufacturers.xml b/zookeeper/exampledocs/manufacturers.xml new file mode 100644 index 0000000..e3121d5 --- /dev/null +++ b/zookeeper/exampledocs/manufacturers.xml @@ -0,0 +1,75 @@ + + + + + adata + A-Data Technology + 46221 Landing Parkway Fremont, CA 94538 + + + apple + Apple + 1 Infinite Way, Cupertino CA + + + asus + ASUS Computer + 800 Corporate Way Fremont, CA 94539 + + + ati + ATI Technologies + 33 Commerce Valley Drive East Thornhill, ON L3T 7N6 Canada + + + belkin + Belkin + 12045 E. Waterfront Drive Playa Vista, CA 90094 + + + canon + Canon, Inc. + One Canon Plaza Lake Success, NY 11042 + + + corsair + Corsair Microsystems + 46221 Landing Parkway Fremont, CA 94538 + + + dell + Dell, Inc. + One Dell Way Round Rock, Texas 78682 + + + maxtor + Maxtor Corporation + 920 Disc Drive Scotts Valley, CA 95066 + + + samsung + Samsung Electronics Co. Ltd. + 105 Challenger Rd. Ridgefield Park, NJ 07660-0511 + + + viewsonic + ViewSonic Corp + 381 Brea Canyon Road Walnut, CA 91789-0708 + + + diff --git a/zookeeper/exampledocs/mem.xml b/zookeeper/exampledocs/mem.xml new file mode 100644 index 0000000..0b89d67 --- /dev/null +++ b/zookeeper/exampledocs/mem.xml @@ -0,0 +1,77 @@ + + + + + TWINX2048-3200PRO + CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail + Corsair Microsystems Inc. + + corsair + electronics + memory + CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader + 185 + 5 + true + + 37.7752,-122.4232 + 2006-02-13T15:26:37Z + + + electronics|6.0 memory|3.0 + + + + VS1GB400C3 + CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail + Corsair Microsystems Inc. + + corsair + electronics + memory + 74.99 + 7 + true + + 37.7752,-100.0232 + 2006-02-13T15:26:37Z + + electronics|4.0 memory|2.0 + + + + VDBDB1A16 + A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM + A-DATA Technology Inc. + + corsair + electronics + memory + CAS latency 3, 2.7v + + 0 + true + + 45.18414,-93.88141 + 2006-02-13T15:26:37Z + + electronics|0.9 memory|0.1 + + + + diff --git a/zookeeper/exampledocs/money.xml b/zookeeper/exampledocs/money.xml new file mode 100644 index 0000000..b1b8036 --- /dev/null +++ b/zookeeper/exampledocs/money.xml @@ -0,0 +1,65 @@ + + + + + + USD + One Dollar + Bank of America + boa + currency + Coins and notes + 1,USD + true + + + + EUR + One Euro + European Union + eu + currency + Coins and notes + 1,EUR + true + + + + GBP + One British Pound + U.K. + uk + currency + Coins and notes + 1,GBP + true + + + + NOK + One Krone + Bank of Norway + nor + currency + Coins and notes + 1,NOK + true + + + + diff --git a/zookeeper/exampledocs/monitor.xml b/zookeeper/exampledocs/monitor.xml new file mode 100644 index 0000000..db986fa --- /dev/null +++ b/zookeeper/exampledocs/monitor.xml @@ -0,0 +1,35 @@ + + + + 3007WFP + Dell Widescreen UltraSharp 3007WFP + Dell, Inc. + + dell + electronics + monitor + 30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast + USB cable + 401.6 + 2199 + 6 + true + + 43.17614,-90.57341 + + diff --git a/zookeeper/exampledocs/monitor2.xml b/zookeeper/exampledocs/monitor2.xml new file mode 100644 index 0000000..79b9949 --- /dev/null +++ b/zookeeper/exampledocs/monitor2.xml @@ -0,0 +1,34 @@ + + + + VA902B + ViewSonic VA902B - flat panel display - TFT - 19" + ViewSonic Corp. + + viewsonic + electronics + monitor + 19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution + 190.4 + 279.95 + 6 + true + + 45.18814,-93.88541 + + diff --git a/zookeeper/exampledocs/mp500.xml b/zookeeper/exampledocs/mp500.xml new file mode 100644 index 0000000..bab401a --- /dev/null +++ b/zookeeper/exampledocs/mp500.xml @@ -0,0 +1,43 @@ + + + + 0579B002 + Canon PIXMA MP500 All-In-One Photo Printer + Canon Inc. + + canon + electronics + multifunction printer + printer + scanner + copier + Multifunction ink-jet color photo printer + Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi + 2.5" color LCD preview screen + Duplex Copying + Printing speed up to 29ppm black, 19ppm color + Hi-Speed USB + memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard + 352 + 179.99 + 6 + true + + 45.19214,-93.89941 + + diff --git a/zookeeper/exampledocs/post.jar b/zookeeper/exampledocs/post.jar new file mode 100644 index 0000000..0042a46 Binary files /dev/null and b/zookeeper/exampledocs/post.jar differ diff --git a/zookeeper/exampledocs/post.sh b/zookeeper/exampledocs/post.sh new file mode 100755 index 0000000..113884d --- /dev/null +++ b/zookeeper/exampledocs/post.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FILES=$* +URL=http://localhost:8983/solr/update + +for f in $FILES; do + echo Posting file $f to $URL + curl $URL --data-binary @$f -H 'Content-type:application/xml' + echo +done + +#send the commit command to make sure all the changes are flushed and visible +#curl $URL --data-binary '' -H 'Content-type:application/xml' + +curl "$URL?softCommit=true" +echo diff --git a/zookeeper/exampledocs/sd500.xml b/zookeeper/exampledocs/sd500.xml new file mode 100644 index 0000000..145c6fd --- /dev/null +++ b/zookeeper/exampledocs/sd500.xml @@ -0,0 +1,38 @@ + + + + 9885A004 + Canon PowerShot SD500 + Canon Inc. + + canon + electronics + camera + 3x zoop, 7.1 megapixel Digital ELPH + movie clips up to 640x480 @30 fps + 2.0" TFT LCD, 118,000 pixels + built in flash, red-eye reduction + 32MB SD card, USB cable, AV cable, battery + 6.4 + 329.95 + 7 + true + 2006-02-13T15:26:37Z + + 45.19614,-93.90341 + diff --git a/zookeeper/exampledocs/solr.xml b/zookeeper/exampledocs/solr.xml new file mode 100644 index 0000000..410e5f7 --- /dev/null +++ b/zookeeper/exampledocs/solr.xml @@ -0,0 +1,38 @@ + + + + + SOLR1000 + Solr, the Enterprise Search Server + Apache Software Foundation + software + search + Advanced Full-Text Search Capabilities using Lucene + Optimized for High Volume Web Traffic + Standards Based Open Interfaces - XML and HTTP + Comprehensive HTML Administration Interfaces + Scalability - Efficient Replication to other Solr Search Servers + Flexible and Adaptable with XML configuration and Schema + Good unicode support: héllo (hello with an accent over the e) + 0 + 10 + true + 2006-01-17T00:00:00.000Z + + + diff --git a/zookeeper/exampledocs/test_utf8.sh b/zookeeper/exampledocs/test_utf8.sh new file mode 100755 index 0000000..edfd972 --- /dev/null +++ b/zookeeper/exampledocs/test_utf8.sh @@ -0,0 +1,93 @@ +#!/bin/sh +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#Test script to tell if the server is accepting UTF-8 +#The python writer currently escapes non-ascii chars, so it's good for testing + +URL=http://localhost:8983/solr + +if [ ! -z $1 ]; then + URL=$1 +fi + +curl "$URL/select?q=hello¶ms=explicit&wt=python" 2> /dev/null | grep 'hello' > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "Solr server is up." +else + echo "ERROR: Could not curl to Solr - is curl installed? Is Solr not running?" + exit 1 +fi + +curl "$URL/select?q=h%C3%A9llo&echoParams=explicit&wt=python" 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "HTTP GET is accepting UTF-8" +else + echo "ERROR: HTTP GET is not accepting UTF-8" +fi + +curl $URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "HTTP POST is accepting UTF-8" +else + echo "ERROR: HTTP POST is not accepting UTF-8" +fi + +curl $URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "HTTP POST defaults to UTF-8" +else + echo "HTTP POST does not default to UTF-8" +fi + + +#A unicode character outside of the BMP (a circle with an x inside) +CHAR="𐌈" +CODEPOINT='0x10308' +#URL encoded UTF8 of the codepoint +URL_UTF8='%F0%90%8C%88' +#expected return of the python writer (currently uses UTF-16 surrogates) +EXPECTED='\\ud800\\udf08' + +curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=python" 2> /dev/null | grep $EXPECTED > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "HTTP GET is accepting UTF-8 beyond the basic multilingual plane" +else + echo "ERROR: HTTP GET is not accepting UTF-8 beyond the basic multilingual plane" +fi + +curl $URL/select --data-binary "q=$URL_UTF8&echoParams=explicit&wt=python" -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "HTTP POST is accepting UTF-8 beyond the basic multilingual plane" +else + echo "ERROR: HTTP POST is not accepting UTF-8 beyond the basic multilingual plane" +fi + +curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=python" --data-binary '' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "HTTP POST + URL params is accepting UTF-8 beyond the basic multilingual plane" +else + echo "ERROR: HTTP POST + URL params is not accepting UTF-8 beyond the basic multilingual plane" +fi + +#curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=json" 2> /dev/null | od -tx1 -w1000 | sed 's/ //g' | grep 'f4808198' > /dev/null 2>&1 +curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=json" 2> /dev/null | grep "$CHAR" > /dev/null 2>&1 +if [ $? = 0 ]; then + echo "Response correctly returns UTF-8 beyond the basic multilingual plane" +else + echo "ERROR: Response can't return UTF-8 beyond the basic multilingual plane" +fi + + diff --git a/zookeeper/exampledocs/utf8-example.xml b/zookeeper/exampledocs/utf8-example.xml new file mode 100644 index 0000000..c9486b2 --- /dev/null +++ b/zookeeper/exampledocs/utf8-example.xml @@ -0,0 +1,42 @@ + + + + + + + + UTF8TEST + Test with some UTF-8 encoded characters + Apache Software Foundation + software + search + No accents here + This is an e acute: é + eaiou with circumflexes: êâîôû + eaiou with umlauts: ëäïöü + tag with escaped chars: <nicetag/> + escaped ampersand: Bonnie & Clyde + Outside the BMP:𐌈 codepoint=10308, a circle with an x inside. UTF8=f0908c88 UTF16=d800 df08 + 0 + true + + + diff --git a/zookeeper/exampledocs/vidcard.xml b/zookeeper/exampledocs/vidcard.xml new file mode 100644 index 0000000..10b8121 --- /dev/null +++ b/zookeeper/exampledocs/vidcard.xml @@ -0,0 +1,62 @@ + + + + + EN7800GTX/2DHTV/256M + ASUS Extreme N7800GTX/2DHTV (256 MB) + + ASUS Computer Inc. + + asus + electronics + graphics card + NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz + 256MB GDDR3 Memory clocked at 1.35GHz + PCI Express x16 + Dual DVI connectors, HDTV out, video input + OpenGL 2.0, DirectX 9.0 + 16 + 479.95 + 7 + 40.7143,-74.006 + false + 2006-02-13T15:26:37Z/DAY + + + + 100-435805 + ATI Radeon X1900 XTX 512 MB PCIE Video Card + ATI Technologies + + ati + electronics + graphics card + ATI RADEON X1900 GPU/VPU clocked at 650MHz + 512MB GDDR3 SDRAM clocked at 1.55GHz + PCI Express x16 + dual DVI, HDTV, svideo, composite out + OpenGL 2.0, DirectX 9.0 + 48 + 649.99 + 7 + false + 2006-02-13T15:26:37Z/DAY + + 40.7143,-74.006 + + diff --git a/zookeeper/lib/ext/jcl-over-slf4j-1.6.6.jar b/zookeeper/lib/ext/jcl-over-slf4j-1.6.6.jar new file mode 100644 index 0000000..ab898c0 Binary files /dev/null and b/zookeeper/lib/ext/jcl-over-slf4j-1.6.6.jar differ diff --git a/zookeeper/lib/ext/jul-to-slf4j-1.6.6.jar b/zookeeper/lib/ext/jul-to-slf4j-1.6.6.jar new file mode 100644 index 0000000..fa8640f Binary files /dev/null and b/zookeeper/lib/ext/jul-to-slf4j-1.6.6.jar differ diff --git a/zookeeper/lib/ext/log4j-1.2.16.jar b/zookeeper/lib/ext/log4j-1.2.16.jar new file mode 100644 index 0000000..5429a90 Binary files /dev/null and b/zookeeper/lib/ext/log4j-1.2.16.jar differ diff --git a/zookeeper/lib/ext/slf4j-api-1.6.6.jar b/zookeeper/lib/ext/slf4j-api-1.6.6.jar new file mode 100644 index 0000000..4c03fa6 Binary files /dev/null and b/zookeeper/lib/ext/slf4j-api-1.6.6.jar differ diff --git a/zookeeper/lib/ext/slf4j-log4j12-1.6.6.jar b/zookeeper/lib/ext/slf4j-log4j12-1.6.6.jar new file mode 100644 index 0000000..e72c2d6 Binary files /dev/null and b/zookeeper/lib/ext/slf4j-log4j12-1.6.6.jar differ diff --git a/zookeeper/lib/jetty-continuation-8.1.10.v20130312.jar b/zookeeper/lib/jetty-continuation-8.1.10.v20130312.jar new file mode 100644 index 0000000..c19fda2 Binary files /dev/null and b/zookeeper/lib/jetty-continuation-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-deploy-8.1.10.v20130312.jar b/zookeeper/lib/jetty-deploy-8.1.10.v20130312.jar new file mode 100644 index 0000000..986513b Binary files /dev/null and b/zookeeper/lib/jetty-deploy-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-http-8.1.10.v20130312.jar b/zookeeper/lib/jetty-http-8.1.10.v20130312.jar new file mode 100644 index 0000000..e0fecc5 Binary files /dev/null and b/zookeeper/lib/jetty-http-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-io-8.1.10.v20130312.jar b/zookeeper/lib/jetty-io-8.1.10.v20130312.jar new file mode 100644 index 0000000..e686933 Binary files /dev/null and b/zookeeper/lib/jetty-io-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-jmx-8.1.10.v20130312.jar b/zookeeper/lib/jetty-jmx-8.1.10.v20130312.jar new file mode 100644 index 0000000..568afb3 Binary files /dev/null and b/zookeeper/lib/jetty-jmx-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-security-8.1.10.v20130312.jar b/zookeeper/lib/jetty-security-8.1.10.v20130312.jar new file mode 100644 index 0000000..4a3054e Binary files /dev/null and b/zookeeper/lib/jetty-security-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-server-8.1.10.v20130312.jar b/zookeeper/lib/jetty-server-8.1.10.v20130312.jar new file mode 100644 index 0000000..e563ec5 Binary files /dev/null and b/zookeeper/lib/jetty-server-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-servlet-8.1.10.v20130312.jar b/zookeeper/lib/jetty-servlet-8.1.10.v20130312.jar new file mode 100644 index 0000000..1f13d52 Binary files /dev/null and b/zookeeper/lib/jetty-servlet-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-util-8.1.10.v20130312.jar b/zookeeper/lib/jetty-util-8.1.10.v20130312.jar new file mode 100644 index 0000000..018b2ea Binary files /dev/null and b/zookeeper/lib/jetty-util-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-webapp-8.1.10.v20130312.jar b/zookeeper/lib/jetty-webapp-8.1.10.v20130312.jar new file mode 100644 index 0000000..c47b968 Binary files /dev/null and b/zookeeper/lib/jetty-webapp-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/jetty-xml-8.1.10.v20130312.jar b/zookeeper/lib/jetty-xml-8.1.10.v20130312.jar new file mode 100644 index 0000000..039702f Binary files /dev/null and b/zookeeper/lib/jetty-xml-8.1.10.v20130312.jar differ diff --git a/zookeeper/lib/servlet-api-3.0.jar b/zookeeper/lib/servlet-api-3.0.jar new file mode 100644 index 0000000..b135409 Binary files /dev/null and b/zookeeper/lib/servlet-api-3.0.jar differ diff --git a/zookeeper/multicore/README.txt b/zookeeper/multicore/README.txt new file mode 100644 index 0000000..eba1457 --- /dev/null +++ b/zookeeper/multicore/README.txt @@ -0,0 +1,7 @@ +This is an alternative setup structure to support multiple cores. + +To run this configuration, start jetty in the example/ directory using: + +java -Dsolr.solr.home=multicore -jar start.jar + +For general examples on standard solr configuration, see the "solr" directory. diff --git a/zookeeper/multicore/core0/conf/schema.xml b/zookeeper/multicore/core0/conf/schema.xml new file mode 100644 index 0000000..7401b5b --- /dev/null +++ b/zookeeper/multicore/core0/conf/schema.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + id + + + name + + + + + diff --git a/zookeeper/multicore/core0/conf/solrconfig.xml b/zookeeper/multicore/core0/conf/solrconfig.xml new file mode 100644 index 0000000..1eb29c6 --- /dev/null +++ b/zookeeper/multicore/core0/conf/solrconfig.xml @@ -0,0 +1,95 @@ + + + + + + 4.4 + + + + ${solr.core0.data.dir:} + + + + + + + ${solr.core0.data.dir:} + + + + + + + true + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + solr + + + + diff --git a/zookeeper/multicore/core1/conf/schema.xml b/zookeeper/multicore/core1/conf/schema.xml new file mode 100644 index 0000000..5a27d39 --- /dev/null +++ b/zookeeper/multicore/core1/conf/schema.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + id + + + name + + + + + diff --git a/zookeeper/multicore/core1/conf/solrconfig.xml b/zookeeper/multicore/core1/conf/solrconfig.xml new file mode 100644 index 0000000..c0aff09 --- /dev/null +++ b/zookeeper/multicore/core1/conf/solrconfig.xml @@ -0,0 +1,95 @@ + + + + + + 4.4 + + + + ${solr.core1.data.dir:} + + + + + + + ${solr.core1.data.dir:} + + + + + + + true + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + solr + + + + diff --git a/zookeeper/multicore/exampledocs/ipod_other.xml b/zookeeper/multicore/exampledocs/ipod_other.xml new file mode 100644 index 0000000..4bfa310 --- /dev/null +++ b/zookeeper/multicore/exampledocs/ipod_other.xml @@ -0,0 +1,34 @@ + + + + + + F8V7067-APL-KIT + Belkin Mobile Power Cord for iPod w/ Dock + + + + IW-02 + iPod & iPod Mini USB 2.0 Cable + + + + + + + diff --git a/zookeeper/multicore/exampledocs/ipod_video.xml b/zookeeper/multicore/exampledocs/ipod_video.xml new file mode 100644 index 0000000..3547fd3 --- /dev/null +++ b/zookeeper/multicore/exampledocs/ipod_video.xml @@ -0,0 +1,22 @@ + + + + MA147LL/A + Apple 60 GB iPod with Video Playback Black + + diff --git a/zookeeper/multicore/solr.xml b/zookeeper/multicore/solr.xml new file mode 100644 index 0000000..2707901 --- /dev/null +++ b/zookeeper/multicore/solr.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + diff --git a/zookeeper/multicore/zoo.cfg b/zookeeper/multicore/zoo.cfg new file mode 100644 index 0000000..aea4518 --- /dev/null +++ b/zookeeper/multicore/zoo.cfg @@ -0,0 +1,17 @@ +# The number of milliseconds of each tick +tickTime=2000 +# The number of ticks that the initial +# synchronization phase can take +initLimit=10 +# The number of ticks that can pass between +# sending a request and getting an acknowledgement +syncLimit=5 + +# the directory where the snapshot is stored. +# dataDir=/opt/zookeeper/data +# NOTE: Solr defaults the dataDir to /zoo_data + +# the port at which the clients will connect +# clientPort=2181 +# NOTE: Solr sets this based on zkRun / zkHost params + diff --git a/zookeeper/options b/zookeeper/options new file mode 100644 index 0000000..f31b9ec --- /dev/null +++ b/zookeeper/options @@ -0,0 +1 @@ +OPTIONS=" -Dbootstrap_confdir=./solr/collection1/conf -Dcollection.configName=myconf -DzkRun -DzkHost=opencontent-solr.index:9983 -DnumShards=2 " \ No newline at end of file diff --git a/zookeeper/options_2 b/zookeeper/options_2 new file mode 100644 index 0000000..bb3930f --- /dev/null +++ b/zookeeper/options_2 @@ -0,0 +1,4 @@ +OPTIONS="-Djetty.port=7500 -DzkHost=opencontent-solr.index:9983" +NAME=second +PID_FILE="./${NAME}.pid" +LOG_FILE="./${NAME}.log" diff --git a/zookeeper/resources/log4j.properties b/zookeeper/resources/log4j.properties new file mode 100644 index 0000000..f33fa71 --- /dev/null +++ b/zookeeper/resources/log4j.properties @@ -0,0 +1,24 @@ +# Logging level +solr.log=logs/ +log4j.rootLogger=INFO, file, CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x \u2013 %m%n + +#- size rotation with log cleanup. +log4j.appender.file=org.apache.log4j.RollingFileAppender +log4j.appender.file.MaxFileSize=4MB +log4j.appender.file.MaxBackupIndex=9 + +#- File to log to and log format +log4j.appender.file.File=${solr.log}/solr.log +log4j.appender.file.layout=org.apache.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n + +log4j.logger.org.apache.zookeeper=WARN +log4j.logger.org.apache.hadoop=WARN + +# set to INFO to enable infostream log messages +log4j.logger.org.apache.solr.update.LoggingInfoStream=OFF diff --git a/zookeeper/solr/README.txt b/zookeeper/solr/README.txt new file mode 100644 index 0000000..64d7c41 --- /dev/null +++ b/zookeeper/solr/README.txt @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +Example Solr Home Directory +============================= + +This directory is provided as an example of what a "Solr Home" directory +should look like. + +It's not strictly necessary that you copy all of the files in this +directory when setting up a new instance of Solr, but it is recommended. + + +Basic Directory Structure +------------------------- + +The Solr Home directory typically contains the following... + +* solr.xml * + +This is the primary configuration file Solr looks for when starting. +This file specifies the list of "SolrCores" it should load, and high +level configuration options that should be used for all SolrCores. + +Please see the comments in ./solr.xml for more details. + +If no solr.xml file is found, then Solr assumes that there should be +a single SolrCore named "collection1" and that the "Instance Directory" +for collection1 should be the same as the Solr Home Directory. + +* Individual SolrCore Instance Directories * + +Although solr.xml can be configured to look for SolrCore Instance Directories +in any path, simple sub-directories of the Solr Home Dir using relative paths +are common for many installations. In this directory you can see the +"./collection1" Instance Directory. + +* A Shared 'lib' Directory * + +Although solr.xml can be configured with an optional "sharedLib" attribute +that can point to any path, it is common to use a "./lib" sub-directory of the +Solr Home Directory. + +* ZooKeeper Files * + +When using SolrCloud using the embedded ZooKeeper option for Solr, it is +common to have a "zoo.cfg" file and "zoo_data" directories in the Solr Home +Directory. Please see the SolrCloud wiki page for more details... + +https://wiki.apache.org/solr/SolrCloud diff --git a/zookeeper/solr/collection1/README.txt b/zookeeper/solr/collection1/README.txt new file mode 100644 index 0000000..337d55b --- /dev/null +++ b/zookeeper/solr/collection1/README.txt @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +Example SolrCore Instance Directory +============================= + +This directory is provided as an example of what an "Instance Directory" +should look like for a SolrCore + +It's not strictly necessary that you copy all of the files in this +directory when setting up a new SolrCores, but it is recommended. + + +Basic Directory Structure +------------------------- + +The Solr Home directory typically contains the following sub-directories... + + conf/ + This directory is mandatory and must contain your solrconfig.xml + and schema.xml. Any other optional configuration files would also + be kept here. + + data/ + This directory is the default location where Solr will keep your + index, and is used by the replication scripts for dealing with + snapshots. You can override this location in the + conf/solrconfig.xml. Solr will create this directory if it does not + already exist. + + lib/ + This directory is optional. If it exists, Solr will load any Jars + found in this directory and use them to resolve any "plugins" + specified in your solrconfig.xml or schema.xml (ie: Analyzers, + Request Handlers, etc...). Alternatively you can use the + syntax in conf/solrconfig.xml to direct Solr to your plugins. See + the example conf/solrconfig.xml file for details. diff --git a/zookeeper/solr/collection1/conf/admin-extra.html b/zookeeper/solr/collection1/conf/admin-extra.html new file mode 100644 index 0000000..fecab20 --- /dev/null +++ b/zookeeper/solr/collection1/conf/admin-extra.html @@ -0,0 +1,24 @@ + + + diff --git a/zookeeper/solr/collection1/conf/admin-extra.menu-bottom.html b/zookeeper/solr/collection1/conf/admin-extra.menu-bottom.html new file mode 100644 index 0000000..3359a46 --- /dev/null +++ b/zookeeper/solr/collection1/conf/admin-extra.menu-bottom.html @@ -0,0 +1,25 @@ + + + + diff --git a/zookeeper/solr/collection1/conf/admin-extra.menu-top.html b/zookeeper/solr/collection1/conf/admin-extra.menu-top.html new file mode 100644 index 0000000..0886cee --- /dev/null +++ b/zookeeper/solr/collection1/conf/admin-extra.menu-top.html @@ -0,0 +1,25 @@ + + + + diff --git a/zookeeper/solr/collection1/conf/currency.xml b/zookeeper/solr/collection1/conf/currency.xml new file mode 100644 index 0000000..3a9c58a --- /dev/null +++ b/zookeeper/solr/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zookeeper/solr/collection1/conf/elevate.xml b/zookeeper/solr/collection1/conf/elevate.xml new file mode 100644 index 0000000..25d5ceb --- /dev/null +++ b/zookeeper/solr/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/zookeeper/solr/collection1/conf/lang/contractions_ca.txt b/zookeeper/solr/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 0000000..307a85f --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/zookeeper/solr/collection1/conf/lang/contractions_fr.txt b/zookeeper/solr/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 0000000..f1bba51 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,15 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j +d +c +jusqu +quoiqu +lorsqu +puisqu diff --git a/zookeeper/solr/collection1/conf/lang/contractions_ga.txt b/zookeeper/solr/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 0000000..9ebe7fa --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/zookeeper/solr/collection1/conf/lang/contractions_it.txt b/zookeeper/solr/collection1/conf/lang/contractions_it.txt new file mode 100644 index 0000000..cac0409 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/zookeeper/solr/collection1/conf/lang/hyphenations_ga.txt b/zookeeper/solr/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 0000000..4d2642c --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/zookeeper/solr/collection1/conf/lang/stemdict_nl.txt b/zookeeper/solr/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 0000000..4410729 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/zookeeper/solr/collection1/conf/lang/stoptags_ja.txt b/zookeeper/solr/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 0000000..71b7508 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, å¹¾ +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, ä¾­, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, å­¦, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (æ°´), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (å½¼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ar.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 0000000..046829d --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_bg.txt b/zookeeper/solr/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 0000000..1ae4ba2 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ca.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 0000000..3da65de --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_cz.txt b/zookeeper/solr/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 0000000..53c6097 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +jeÅ¡tě +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_da.txt b/zookeeper/solr/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 0000000..a3ff5fe --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_de.txt b/zookeeper/solr/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 0000000..f770384 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_el.txt b/zookeeper/solr/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 0000000..232681f --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_en.txt b/zookeeper/solr/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 0000000..2c164c0 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_es.txt b/zookeeper/solr/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 0000000..2db1476 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_eu.txt b/zookeeper/solr/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 0000000..25f1db9 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_fa.txt b/zookeeper/solr/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 0000000..723641c --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_fi.txt b/zookeeper/solr/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 0000000..addad79 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_fr.txt b/zookeeper/solr/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 0000000..20d12cb --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,184 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +cela | that +celà | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ga.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 0000000..9ff88d7 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_gl.txt b/zookeeper/solr/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 0000000..d8760b1 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_hi.txt b/zookeeper/solr/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 0000000..86286bb --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_hu.txt b/zookeeper/solr/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 0000000..1a96f1d --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_hy.txt b/zookeeper/solr/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 0000000..60c1c50 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +դու +դուք +Õ¥Õ´ +Õ¥Õ¶ +ենք +Õ¥Õ½ +եք +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +էինք +էիր +էիք +էր +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +իր +Õ¯Õ¡Õ´ +համար +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +մենք +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +ÕºÕ«Õ¿Õ« +վրա +և diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_id.txt b/zookeeper/solr/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 0000000..4617f83 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_it.txt b/zookeeper/solr/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 0000000..4cb5b08 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ja.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 0000000..d4321be --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_lv.txt b/zookeeper/solr/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 0000000..e21a23c --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +ārpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretÄ« +arÄ« +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taču +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_nl.txt b/zookeeper/solr/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 0000000..f4d61f5 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_no.txt b/zookeeper/solr/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 0000000..e76f36e --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_pt.txt b/zookeeper/solr/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 0000000..276c1b4 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ro.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 0000000..4fdee90 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ru.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 0000000..6430769 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_sv.txt b/zookeeper/solr/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 0000000..22bddfd --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_th.txt b/zookeeper/solr/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 0000000..07f0fab --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_tr.txt b/zookeeper/solr/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 0000000..84d9408 --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/zookeeper/solr/collection1/conf/lang/userdict_ja.txt b/zookeeper/solr/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 0000000..6f0368e --- /dev/null +++ b/zookeeper/solr/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/zookeeper/solr/collection1/conf/mapping-FoldToASCII.txt b/zookeeper/solr/collection1/conf/mapping-FoldToASCII.txt new file mode 100644 index 0000000..9a84b6e --- /dev/null +++ b/zookeeper/solr/collection1/conf/mapping-FoldToASCII.txt @@ -0,0 +1,3813 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This map converts alphabetic, numeric, and symbolic Unicode characters +# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode +# block) into their ASCII equivalents, if one exists. +# +# Characters from the following Unicode blocks are converted; however, only +# those characters with reasonable ASCII alternatives are converted: +# +# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf +# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf +# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf +# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf +# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf +# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf +# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf +# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf +# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf +# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf +# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf +# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf +# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf +# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf +# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf +# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf +# +# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode +# +# The set of character conversions supported by this map is a superset of +# those supported by the map represented by mapping-ISOLatin1Accent.txt. +# +# See the bottom of this file for the Perl script used to generate the contents +# of this file (without this header) from ASCIIFoldingFilter.java. + + +# Syntax: +# "source" => "target" +# "source".length() > 0 (source cannot be empty.) +# "target".length() >= 0 (target can be empty.) + + +# À [LATIN CAPITAL LETTER A WITH GRAVE] +"\u00C0" => "A" + +# Á [LATIN CAPITAL LETTER A WITH ACUTE] +"\u00C1" => "A" + +#  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] +"\u00C2" => "A" + +# à [LATIN CAPITAL LETTER A WITH TILDE] +"\u00C3" => "A" + +# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] +"\u00C4" => "A" + +# Å [LATIN CAPITAL LETTER A WITH RING ABOVE] +"\u00C5" => "A" + +# Ā [LATIN CAPITAL LETTER A WITH MACRON] +"\u0100" => "A" + +# Ă [LATIN CAPITAL LETTER A WITH BREVE] +"\u0102" => "A" + +# Ą [LATIN CAPITAL LETTER A WITH OGONEK] +"\u0104" => "A" + +# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] +"\u018F" => "A" + +# Ǎ [LATIN CAPITAL LETTER A WITH CARON] +"\u01CD" => "A" + +# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] +"\u01DE" => "A" + +# Ç  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] +"\u01E0" => "A" + +# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] +"\u01FA" => "A" + +# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] +"\u0200" => "A" + +# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] +"\u0202" => "A" + +# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] +"\u0226" => "A" + +# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] +"\u023A" => "A" + +# ᴀ [LATIN LETTER SMALL CAPITAL A] +"\u1D00" => "A" + +# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] +"\u1E00" => "A" + +# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] +"\u1EA0" => "A" + +# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] +"\u1EA2" => "A" + +# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] +"\u1EA4" => "A" + +# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] +"\u1EA6" => "A" + +# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EA8" => "A" + +# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] +"\u1EAA" => "A" + +# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] +"\u1EAC" => "A" + +# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] +"\u1EAE" => "A" + +# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] +"\u1EB0" => "A" + +# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] +"\u1EB2" => "A" + +# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] +"\u1EB4" => "A" + +# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] +"\u1EB6" => "A" + +# Ⓐ [CIRCLED LATIN CAPITAL LETTER A] +"\u24B6" => "A" + +# A [FULLWIDTH LATIN CAPITAL LETTER A] +"\uFF21" => "A" + +# à [LATIN SMALL LETTER A WITH GRAVE] +"\u00E0" => "a" + +# á [LATIN SMALL LETTER A WITH ACUTE] +"\u00E1" => "a" + +# â [LATIN SMALL LETTER A WITH CIRCUMFLEX] +"\u00E2" => "a" + +# ã [LATIN SMALL LETTER A WITH TILDE] +"\u00E3" => "a" + +# ä [LATIN SMALL LETTER A WITH DIAERESIS] +"\u00E4" => "a" + +# Ã¥ [LATIN SMALL LETTER A WITH RING ABOVE] +"\u00E5" => "a" + +# ā [LATIN SMALL LETTER A WITH MACRON] +"\u0101" => "a" + +# ă [LATIN SMALL LETTER A WITH BREVE] +"\u0103" => "a" + +# ą [LATIN SMALL LETTER A WITH OGONEK] +"\u0105" => "a" + +# ǎ [LATIN SMALL LETTER A WITH CARON] +"\u01CE" => "a" + +# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] +"\u01DF" => "a" + +# Ç¡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] +"\u01E1" => "a" + +# Ç» [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] +"\u01FB" => "a" + +# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] +"\u0201" => "a" + +# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] +"\u0203" => "a" + +# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] +"\u0227" => "a" + +# ɐ [LATIN SMALL LETTER TURNED A] +"\u0250" => "a" + +# ə [LATIN SMALL LETTER SCHWA] +"\u0259" => "a" + +# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] +"\u025A" => "a" + +# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] +"\u1D8F" => "a" + +# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] +"\u1D95" => "a" + +# ạ [LATIN SMALL LETTER A WITH RING BELOW] +"\u1E01" => "a" + +# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] +"\u1E9A" => "a" + +# ạ [LATIN SMALL LETTER A WITH DOT BELOW] +"\u1EA1" => "a" + +# ả [LATIN SMALL LETTER A WITH HOOK ABOVE] +"\u1EA3" => "a" + +# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] +"\u1EA5" => "a" + +# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] +"\u1EA7" => "a" + +# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EA9" => "a" + +# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] +"\u1EAB" => "a" + +# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] +"\u1EAD" => "a" + +# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] +"\u1EAF" => "a" + +# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] +"\u1EB1" => "a" + +# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] +"\u1EB3" => "a" + +# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] +"\u1EB5" => "a" + +# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] +"\u1EB7" => "a" + +# ₐ [LATIN SUBSCRIPT SMALL LETTER A] +"\u2090" => "a" + +# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] +"\u2094" => "a" + +# ⓐ [CIRCLED LATIN SMALL LETTER A] +"\u24D0" => "a" + +# â±¥ [LATIN SMALL LETTER A WITH STROKE] +"\u2C65" => "a" + +# Ɐ [LATIN CAPITAL LETTER TURNED A] +"\u2C6F" => "a" + +# a [FULLWIDTH LATIN SMALL LETTER A] +"\uFF41" => "a" + +# Ꜳ [LATIN CAPITAL LETTER AA] +"\uA732" => "AA" + +# Æ [LATIN CAPITAL LETTER AE] +"\u00C6" => "AE" + +# Ç¢ [LATIN CAPITAL LETTER AE WITH MACRON] +"\u01E2" => "AE" + +# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] +"\u01FC" => "AE" + +# ᴁ [LATIN LETTER SMALL CAPITAL AE] +"\u1D01" => "AE" + +# Ꜵ [LATIN CAPITAL LETTER AO] +"\uA734" => "AO" + +# Ꜷ [LATIN CAPITAL LETTER AU] +"\uA736" => "AU" + +# Ꜹ [LATIN CAPITAL LETTER AV] +"\uA738" => "AV" + +# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] +"\uA73A" => "AV" + +# Ꜽ [LATIN CAPITAL LETTER AY] +"\uA73C" => "AY" + +# ⒜ [PARENTHESIZED LATIN SMALL LETTER A] +"\u249C" => "(a)" + +# ꜳ [LATIN SMALL LETTER AA] +"\uA733" => "aa" + +# æ [LATIN SMALL LETTER AE] +"\u00E6" => "ae" + +# Ç£ [LATIN SMALL LETTER AE WITH MACRON] +"\u01E3" => "ae" + +# ǽ [LATIN SMALL LETTER AE WITH ACUTE] +"\u01FD" => "ae" + +# ᴂ [LATIN SMALL LETTER TURNED AE] +"\u1D02" => "ae" + +# ꜵ [LATIN SMALL LETTER AO] +"\uA735" => "ao" + +# ꜷ [LATIN SMALL LETTER AU] +"\uA737" => "au" + +# ꜹ [LATIN SMALL LETTER AV] +"\uA739" => "av" + +# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] +"\uA73B" => "av" + +# ꜽ [LATIN SMALL LETTER AY] +"\uA73D" => "ay" + +# Ɓ [LATIN CAPITAL LETTER B WITH HOOK] +"\u0181" => "B" + +# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] +"\u0182" => "B" + +# Ƀ [LATIN CAPITAL LETTER B WITH STROKE] +"\u0243" => "B" + +# ʙ [LATIN LETTER SMALL CAPITAL B] +"\u0299" => "B" + +# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] +"\u1D03" => "B" + +# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] +"\u1E02" => "B" + +# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] +"\u1E04" => "B" + +# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] +"\u1E06" => "B" + +# Ⓑ [CIRCLED LATIN CAPITAL LETTER B] +"\u24B7" => "B" + +# ï¼¢ [FULLWIDTH LATIN CAPITAL LETTER B] +"\uFF22" => "B" + +# ƀ [LATIN SMALL LETTER B WITH STROKE] +"\u0180" => "b" + +# ƃ [LATIN SMALL LETTER B WITH TOPBAR] +"\u0183" => "b" + +# ɓ [LATIN SMALL LETTER B WITH HOOK] +"\u0253" => "b" + +# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] +"\u1D6C" => "b" + +# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] +"\u1D80" => "b" + +# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] +"\u1E03" => "b" + +# ḅ [LATIN SMALL LETTER B WITH DOT BELOW] +"\u1E05" => "b" + +# ḇ [LATIN SMALL LETTER B WITH LINE BELOW] +"\u1E07" => "b" + +# ⓑ [CIRCLED LATIN SMALL LETTER B] +"\u24D1" => "b" + +# b [FULLWIDTH LATIN SMALL LETTER B] +"\uFF42" => "b" + +# ⒝ [PARENTHESIZED LATIN SMALL LETTER B] +"\u249D" => "(b)" + +# Ç [LATIN CAPITAL LETTER C WITH CEDILLA] +"\u00C7" => "C" + +# Ć [LATIN CAPITAL LETTER C WITH ACUTE] +"\u0106" => "C" + +# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] +"\u0108" => "C" + +# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] +"\u010A" => "C" + +# Č [LATIN CAPITAL LETTER C WITH CARON] +"\u010C" => "C" + +# Ƈ [LATIN CAPITAL LETTER C WITH HOOK] +"\u0187" => "C" + +# È» [LATIN CAPITAL LETTER C WITH STROKE] +"\u023B" => "C" + +# ʗ [LATIN LETTER STRETCHED C] +"\u0297" => "C" + +# ᴄ [LATIN LETTER SMALL CAPITAL C] +"\u1D04" => "C" + +# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] +"\u1E08" => "C" + +# Ⓒ [CIRCLED LATIN CAPITAL LETTER C] +"\u24B8" => "C" + +# ï¼£ [FULLWIDTH LATIN CAPITAL LETTER C] +"\uFF23" => "C" + +# ç [LATIN SMALL LETTER C WITH CEDILLA] +"\u00E7" => "c" + +# ć [LATIN SMALL LETTER C WITH ACUTE] +"\u0107" => "c" + +# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] +"\u0109" => "c" + +# ċ [LATIN SMALL LETTER C WITH DOT ABOVE] +"\u010B" => "c" + +# č [LATIN SMALL LETTER C WITH CARON] +"\u010D" => "c" + +# ƈ [LATIN SMALL LETTER C WITH HOOK] +"\u0188" => "c" + +# ȼ [LATIN SMALL LETTER C WITH STROKE] +"\u023C" => "c" + +# ɕ [LATIN SMALL LETTER C WITH CURL] +"\u0255" => "c" + +# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] +"\u1E09" => "c" + +# ↄ [LATIN SMALL LETTER REVERSED C] +"\u2184" => "c" + +# ⓒ [CIRCLED LATIN SMALL LETTER C] +"\u24D2" => "c" + +# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] +"\uA73E" => "c" + +# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] +"\uA73F" => "c" + +# c [FULLWIDTH LATIN SMALL LETTER C] +"\uFF43" => "c" + +# ⒞ [PARENTHESIZED LATIN SMALL LETTER C] +"\u249E" => "(c)" + +# Ð [LATIN CAPITAL LETTER ETH] +"\u00D0" => "D" + +# Ď [LATIN CAPITAL LETTER D WITH CARON] +"\u010E" => "D" + +# Đ [LATIN CAPITAL LETTER D WITH STROKE] +"\u0110" => "D" + +# Ɖ [LATIN CAPITAL LETTER AFRICAN D] +"\u0189" => "D" + +# Ɗ [LATIN CAPITAL LETTER D WITH HOOK] +"\u018A" => "D" + +# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] +"\u018B" => "D" + +# ᴅ [LATIN LETTER SMALL CAPITAL D] +"\u1D05" => "D" + +# ᴆ [LATIN LETTER SMALL CAPITAL ETH] +"\u1D06" => "D" + +# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] +"\u1E0A" => "D" + +# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] +"\u1E0C" => "D" + +# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] +"\u1E0E" => "D" + +# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] +"\u1E10" => "D" + +# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] +"\u1E12" => "D" + +# Ⓓ [CIRCLED LATIN CAPITAL LETTER D] +"\u24B9" => "D" + +# Ꝺ [LATIN CAPITAL LETTER INSULAR D] +"\uA779" => "D" + +# D [FULLWIDTH LATIN CAPITAL LETTER D] +"\uFF24" => "D" + +# ð [LATIN SMALL LETTER ETH] +"\u00F0" => "d" + +# ď [LATIN SMALL LETTER D WITH CARON] +"\u010F" => "d" + +# đ [LATIN SMALL LETTER D WITH STROKE] +"\u0111" => "d" + +# ƌ [LATIN SMALL LETTER D WITH TOPBAR] +"\u018C" => "d" + +# È¡ [LATIN SMALL LETTER D WITH CURL] +"\u0221" => "d" + +# ɖ [LATIN SMALL LETTER D WITH TAIL] +"\u0256" => "d" + +# ɗ [LATIN SMALL LETTER D WITH HOOK] +"\u0257" => "d" + +# áµ­ [LATIN SMALL LETTER D WITH MIDDLE TILDE] +"\u1D6D" => "d" + +# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] +"\u1D81" => "d" + +# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] +"\u1D91" => "d" + +# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] +"\u1E0B" => "d" + +# ḍ [LATIN SMALL LETTER D WITH DOT BELOW] +"\u1E0D" => "d" + +# ḏ [LATIN SMALL LETTER D WITH LINE BELOW] +"\u1E0F" => "d" + +# ḑ [LATIN SMALL LETTER D WITH CEDILLA] +"\u1E11" => "d" + +# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] +"\u1E13" => "d" + +# ⓓ [CIRCLED LATIN SMALL LETTER D] +"\u24D3" => "d" + +# ꝺ [LATIN SMALL LETTER INSULAR D] +"\uA77A" => "d" + +# d [FULLWIDTH LATIN SMALL LETTER D] +"\uFF44" => "d" + +# DŽ [LATIN CAPITAL LETTER DZ WITH CARON] +"\u01C4" => "DZ" + +# DZ [LATIN CAPITAL LETTER DZ] +"\u01F1" => "DZ" + +# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] +"\u01C5" => "Dz" + +# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] +"\u01F2" => "Dz" + +# ⒟ [PARENTHESIZED LATIN SMALL LETTER D] +"\u249F" => "(d)" + +# ȸ [LATIN SMALL LETTER DB DIGRAPH] +"\u0238" => "db" + +# dž [LATIN SMALL LETTER DZ WITH CARON] +"\u01C6" => "dz" + +# dz [LATIN SMALL LETTER DZ] +"\u01F3" => "dz" + +# Ê£ [LATIN SMALL LETTER DZ DIGRAPH] +"\u02A3" => "dz" + +# Ê¥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] +"\u02A5" => "dz" + +# È [LATIN CAPITAL LETTER E WITH GRAVE] +"\u00C8" => "E" + +# É [LATIN CAPITAL LETTER E WITH ACUTE] +"\u00C9" => "E" + +# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] +"\u00CA" => "E" + +# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] +"\u00CB" => "E" + +# Ē [LATIN CAPITAL LETTER E WITH MACRON] +"\u0112" => "E" + +# Ĕ [LATIN CAPITAL LETTER E WITH BREVE] +"\u0114" => "E" + +# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] +"\u0116" => "E" + +# Ę [LATIN CAPITAL LETTER E WITH OGONEK] +"\u0118" => "E" + +# Ě [LATIN CAPITAL LETTER E WITH CARON] +"\u011A" => "E" + +# Ǝ [LATIN CAPITAL LETTER REVERSED E] +"\u018E" => "E" + +# Ɛ [LATIN CAPITAL LETTER OPEN E] +"\u0190" => "E" + +# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] +"\u0204" => "E" + +# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] +"\u0206" => "E" + +# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] +"\u0228" => "E" + +# Ɇ [LATIN CAPITAL LETTER E WITH STROKE] +"\u0246" => "E" + +# ᴇ [LATIN LETTER SMALL CAPITAL E] +"\u1D07" => "E" + +# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] +"\u1E14" => "E" + +# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] +"\u1E16" => "E" + +# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] +"\u1E18" => "E" + +# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] +"\u1E1A" => "E" + +# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] +"\u1E1C" => "E" + +# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] +"\u1EB8" => "E" + +# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] +"\u1EBA" => "E" + +# Ẽ [LATIN CAPITAL LETTER E WITH TILDE] +"\u1EBC" => "E" + +# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] +"\u1EBE" => "E" + +# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] +"\u1EC0" => "E" + +# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EC2" => "E" + +# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] +"\u1EC4" => "E" + +# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] +"\u1EC6" => "E" + +# Ⓔ [CIRCLED LATIN CAPITAL LETTER E] +"\u24BA" => "E" + +# â±» [LATIN LETTER SMALL CAPITAL TURNED E] +"\u2C7B" => "E" + +# ï¼¥ [FULLWIDTH LATIN CAPITAL LETTER E] +"\uFF25" => "E" + +# è [LATIN SMALL LETTER E WITH GRAVE] +"\u00E8" => "e" + +# é [LATIN SMALL LETTER E WITH ACUTE] +"\u00E9" => "e" + +# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] +"\u00EA" => "e" + +# ë [LATIN SMALL LETTER E WITH DIAERESIS] +"\u00EB" => "e" + +# ē [LATIN SMALL LETTER E WITH MACRON] +"\u0113" => "e" + +# ĕ [LATIN SMALL LETTER E WITH BREVE] +"\u0115" => "e" + +# ė [LATIN SMALL LETTER E WITH DOT ABOVE] +"\u0117" => "e" + +# ę [LATIN SMALL LETTER E WITH OGONEK] +"\u0119" => "e" + +# ě [LATIN SMALL LETTER E WITH CARON] +"\u011B" => "e" + +# ǝ [LATIN SMALL LETTER TURNED E] +"\u01DD" => "e" + +# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] +"\u0205" => "e" + +# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] +"\u0207" => "e" + +# È© [LATIN SMALL LETTER E WITH CEDILLA] +"\u0229" => "e" + +# ɇ [LATIN SMALL LETTER E WITH STROKE] +"\u0247" => "e" + +# ɘ [LATIN SMALL LETTER REVERSED E] +"\u0258" => "e" + +# ɛ [LATIN SMALL LETTER OPEN E] +"\u025B" => "e" + +# ɜ [LATIN SMALL LETTER REVERSED OPEN E] +"\u025C" => "e" + +# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] +"\u025D" => "e" + +# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] +"\u025E" => "e" + +# ʚ [LATIN SMALL LETTER CLOSED OPEN E] +"\u029A" => "e" + +# ᴈ [LATIN SMALL LETTER TURNED OPEN E] +"\u1D08" => "e" + +# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] +"\u1D92" => "e" + +# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] +"\u1D93" => "e" + +# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] +"\u1D94" => "e" + +# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] +"\u1E15" => "e" + +# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] +"\u1E17" => "e" + +# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] +"\u1E19" => "e" + +# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] +"\u1E1B" => "e" + +# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] +"\u1E1D" => "e" + +# ẹ [LATIN SMALL LETTER E WITH DOT BELOW] +"\u1EB9" => "e" + +# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] +"\u1EBB" => "e" + +# ẽ [LATIN SMALL LETTER E WITH TILDE] +"\u1EBD" => "e" + +# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] +"\u1EBF" => "e" + +# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] +"\u1EC1" => "e" + +# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EC3" => "e" + +# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] +"\u1EC5" => "e" + +# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] +"\u1EC7" => "e" + +# ₑ [LATIN SUBSCRIPT SMALL LETTER E] +"\u2091" => "e" + +# ⓔ [CIRCLED LATIN SMALL LETTER E] +"\u24D4" => "e" + +# ⱸ [LATIN SMALL LETTER E WITH NOTCH] +"\u2C78" => "e" + +# e [FULLWIDTH LATIN SMALL LETTER E] +"\uFF45" => "e" + +# ⒠ [PARENTHESIZED LATIN SMALL LETTER E] +"\u24A0" => "(e)" + +# Ƒ [LATIN CAPITAL LETTER F WITH HOOK] +"\u0191" => "F" + +# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] +"\u1E1E" => "F" + +# Ⓕ [CIRCLED LATIN CAPITAL LETTER F] +"\u24BB" => "F" + +# ꜰ [LATIN LETTER SMALL CAPITAL F] +"\uA730" => "F" + +# Ꝼ [LATIN CAPITAL LETTER INSULAR F] +"\uA77B" => "F" + +# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] +"\uA7FB" => "F" + +# F [FULLWIDTH LATIN CAPITAL LETTER F] +"\uFF26" => "F" + +# ƒ [LATIN SMALL LETTER F WITH HOOK] +"\u0192" => "f" + +# áµ® [LATIN SMALL LETTER F WITH MIDDLE TILDE] +"\u1D6E" => "f" + +# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] +"\u1D82" => "f" + +# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] +"\u1E1F" => "f" + +# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] +"\u1E9B" => "f" + +# ⓕ [CIRCLED LATIN SMALL LETTER F] +"\u24D5" => "f" + +# ꝼ [LATIN SMALL LETTER INSULAR F] +"\uA77C" => "f" + +# f [FULLWIDTH LATIN SMALL LETTER F] +"\uFF46" => "f" + +# ⒡ [PARENTHESIZED LATIN SMALL LETTER F] +"\u24A1" => "(f)" + +# ff [LATIN SMALL LIGATURE FF] +"\uFB00" => "ff" + +# ffi [LATIN SMALL LIGATURE FFI] +"\uFB03" => "ffi" + +# ffl [LATIN SMALL LIGATURE FFL] +"\uFB04" => "ffl" + +# fi [LATIN SMALL LIGATURE FI] +"\uFB01" => "fi" + +# fl [LATIN SMALL LIGATURE FL] +"\uFB02" => "fl" + +# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] +"\u011C" => "G" + +# Ğ [LATIN CAPITAL LETTER G WITH BREVE] +"\u011E" => "G" + +# Ä  [LATIN CAPITAL LETTER G WITH DOT ABOVE] +"\u0120" => "G" + +# Ä¢ [LATIN CAPITAL LETTER G WITH CEDILLA] +"\u0122" => "G" + +# Ɠ [LATIN CAPITAL LETTER G WITH HOOK] +"\u0193" => "G" + +# Ǥ [LATIN CAPITAL LETTER G WITH STROKE] +"\u01E4" => "G" + +# Ç¥ [LATIN SMALL LETTER G WITH STROKE] +"\u01E5" => "G" + +# Ǧ [LATIN CAPITAL LETTER G WITH CARON] +"\u01E6" => "G" + +# ǧ [LATIN SMALL LETTER G WITH CARON] +"\u01E7" => "G" + +# Ç´ [LATIN CAPITAL LETTER G WITH ACUTE] +"\u01F4" => "G" + +# É¢ [LATIN LETTER SMALL CAPITAL G] +"\u0262" => "G" + +# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] +"\u029B" => "G" + +# Ḡ [LATIN CAPITAL LETTER G WITH MACRON] +"\u1E20" => "G" + +# Ⓖ [CIRCLED LATIN CAPITAL LETTER G] +"\u24BC" => "G" + +# Ᵹ [LATIN CAPITAL LETTER INSULAR G] +"\uA77D" => "G" + +# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] +"\uA77E" => "G" + +# G [FULLWIDTH LATIN CAPITAL LETTER G] +"\uFF27" => "G" + +# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] +"\u011D" => "g" + +# ğ [LATIN SMALL LETTER G WITH BREVE] +"\u011F" => "g" + +# Ä¡ [LATIN SMALL LETTER G WITH DOT ABOVE] +"\u0121" => "g" + +# Ä£ [LATIN SMALL LETTER G WITH CEDILLA] +"\u0123" => "g" + +# ǵ [LATIN SMALL LETTER G WITH ACUTE] +"\u01F5" => "g" + +# É  [LATIN SMALL LETTER G WITH HOOK] +"\u0260" => "g" + +# É¡ [LATIN SMALL LETTER SCRIPT G] +"\u0261" => "g" + +# áµ· [LATIN SMALL LETTER TURNED G] +"\u1D77" => "g" + +# áµ¹ [LATIN SMALL LETTER INSULAR G] +"\u1D79" => "g" + +# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] +"\u1D83" => "g" + +# ḡ [LATIN SMALL LETTER G WITH MACRON] +"\u1E21" => "g" + +# ⓖ [CIRCLED LATIN SMALL LETTER G] +"\u24D6" => "g" + +# ꝿ [LATIN SMALL LETTER TURNED INSULAR G] +"\uA77F" => "g" + +# g [FULLWIDTH LATIN SMALL LETTER G] +"\uFF47" => "g" + +# ⒢ [PARENTHESIZED LATIN SMALL LETTER G] +"\u24A2" => "(g)" + +# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] +"\u0124" => "H" + +# Ħ [LATIN CAPITAL LETTER H WITH STROKE] +"\u0126" => "H" + +# Ȟ [LATIN CAPITAL LETTER H WITH CARON] +"\u021E" => "H" + +# ʜ [LATIN LETTER SMALL CAPITAL H] +"\u029C" => "H" + +# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] +"\u1E22" => "H" + +# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] +"\u1E24" => "H" + +# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] +"\u1E26" => "H" + +# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] +"\u1E28" => "H" + +# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] +"\u1E2A" => "H" + +# Ⓗ [CIRCLED LATIN CAPITAL LETTER H] +"\u24BD" => "H" + +# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] +"\u2C67" => "H" + +# â±µ [LATIN CAPITAL LETTER HALF H] +"\u2C75" => "H" + +# H [FULLWIDTH LATIN CAPITAL LETTER H] +"\uFF28" => "H" + +# Ä¥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] +"\u0125" => "h" + +# ħ [LATIN SMALL LETTER H WITH STROKE] +"\u0127" => "h" + +# ȟ [LATIN SMALL LETTER H WITH CARON] +"\u021F" => "h" + +# É¥ [LATIN SMALL LETTER TURNED H] +"\u0265" => "h" + +# ɦ [LATIN SMALL LETTER H WITH HOOK] +"\u0266" => "h" + +# Ê® [LATIN SMALL LETTER TURNED H WITH FISHHOOK] +"\u02AE" => "h" + +# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] +"\u02AF" => "h" + +# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] +"\u1E23" => "h" + +# ḥ [LATIN SMALL LETTER H WITH DOT BELOW] +"\u1E25" => "h" + +# ḧ [LATIN SMALL LETTER H WITH DIAERESIS] +"\u1E27" => "h" + +# ḩ [LATIN SMALL LETTER H WITH CEDILLA] +"\u1E29" => "h" + +# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] +"\u1E2B" => "h" + +# ẖ [LATIN SMALL LETTER H WITH LINE BELOW] +"\u1E96" => "h" + +# ⓗ [CIRCLED LATIN SMALL LETTER H] +"\u24D7" => "h" + +# ⱨ [LATIN SMALL LETTER H WITH DESCENDER] +"\u2C68" => "h" + +# ⱶ [LATIN SMALL LETTER HALF H] +"\u2C76" => "h" + +# h [FULLWIDTH LATIN SMALL LETTER H] +"\uFF48" => "h" + +# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] +"\u01F6" => "HV" + +# ⒣ [PARENTHESIZED LATIN SMALL LETTER H] +"\u24A3" => "(h)" + +# ƕ [LATIN SMALL LETTER HV] +"\u0195" => "hv" + +# Ì [LATIN CAPITAL LETTER I WITH GRAVE] +"\u00CC" => "I" + +# Í [LATIN CAPITAL LETTER I WITH ACUTE] +"\u00CD" => "I" + +# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] +"\u00CE" => "I" + +# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] +"\u00CF" => "I" + +# Ĩ [LATIN CAPITAL LETTER I WITH TILDE] +"\u0128" => "I" + +# Ī [LATIN CAPITAL LETTER I WITH MACRON] +"\u012A" => "I" + +# Ĭ [LATIN CAPITAL LETTER I WITH BREVE] +"\u012C" => "I" + +# Ä® [LATIN CAPITAL LETTER I WITH OGONEK] +"\u012E" => "I" + +# Ä° [LATIN CAPITAL LETTER I WITH DOT ABOVE] +"\u0130" => "I" + +# Ɩ [LATIN CAPITAL LETTER IOTA] +"\u0196" => "I" + +# Ɨ [LATIN CAPITAL LETTER I WITH STROKE] +"\u0197" => "I" + +# Ǐ [LATIN CAPITAL LETTER I WITH CARON] +"\u01CF" => "I" + +# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] +"\u0208" => "I" + +# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] +"\u020A" => "I" + +# ɪ [LATIN LETTER SMALL CAPITAL I] +"\u026A" => "I" + +# áµ» [LATIN SMALL CAPITAL LETTER I WITH STROKE] +"\u1D7B" => "I" + +# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] +"\u1E2C" => "I" + +# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] +"\u1E2E" => "I" + +# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] +"\u1EC8" => "I" + +# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] +"\u1ECA" => "I" + +# Ⓘ [CIRCLED LATIN CAPITAL LETTER I] +"\u24BE" => "I" + +# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] +"\uA7FE" => "I" + +# I [FULLWIDTH LATIN CAPITAL LETTER I] +"\uFF29" => "I" + +# ì [LATIN SMALL LETTER I WITH GRAVE] +"\u00EC" => "i" + +# í [LATIN SMALL LETTER I WITH ACUTE] +"\u00ED" => "i" + +# î [LATIN SMALL LETTER I WITH CIRCUMFLEX] +"\u00EE" => "i" + +# ï [LATIN SMALL LETTER I WITH DIAERESIS] +"\u00EF" => "i" + +# Ä© [LATIN SMALL LETTER I WITH TILDE] +"\u0129" => "i" + +# Ä« [LATIN SMALL LETTER I WITH MACRON] +"\u012B" => "i" + +# Ä­ [LATIN SMALL LETTER I WITH BREVE] +"\u012D" => "i" + +# į [LATIN SMALL LETTER I WITH OGONEK] +"\u012F" => "i" + +# ı [LATIN SMALL LETTER DOTLESS I] +"\u0131" => "i" + +# ǐ [LATIN SMALL LETTER I WITH CARON] +"\u01D0" => "i" + +# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] +"\u0209" => "i" + +# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] +"\u020B" => "i" + +# ɨ [LATIN SMALL LETTER I WITH STROKE] +"\u0268" => "i" + +# ᴉ [LATIN SMALL LETTER TURNED I] +"\u1D09" => "i" + +# áµ¢ [LATIN SUBSCRIPT SMALL LETTER I] +"\u1D62" => "i" + +# áµ¼ [LATIN SMALL LETTER IOTA WITH STROKE] +"\u1D7C" => "i" + +# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] +"\u1D96" => "i" + +# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] +"\u1E2D" => "i" + +# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] +"\u1E2F" => "i" + +# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] +"\u1EC9" => "i" + +# ị [LATIN SMALL LETTER I WITH DOT BELOW] +"\u1ECB" => "i" + +# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] +"\u2071" => "i" + +# ⓘ [CIRCLED LATIN SMALL LETTER I] +"\u24D8" => "i" + +# i [FULLWIDTH LATIN SMALL LETTER I] +"\uFF49" => "i" + +# IJ [LATIN CAPITAL LIGATURE IJ] +"\u0132" => "IJ" + +# ⒤ [PARENTHESIZED LATIN SMALL LETTER I] +"\u24A4" => "(i)" + +# ij [LATIN SMALL LIGATURE IJ] +"\u0133" => "ij" + +# Ä´ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] +"\u0134" => "J" + +# Ɉ [LATIN CAPITAL LETTER J WITH STROKE] +"\u0248" => "J" + +# ᴊ [LATIN LETTER SMALL CAPITAL J] +"\u1D0A" => "J" + +# Ⓙ [CIRCLED LATIN CAPITAL LETTER J] +"\u24BF" => "J" + +# J [FULLWIDTH LATIN CAPITAL LETTER J] +"\uFF2A" => "J" + +# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] +"\u0135" => "j" + +# Ç° [LATIN SMALL LETTER J WITH CARON] +"\u01F0" => "j" + +# È· [LATIN SMALL LETTER DOTLESS J] +"\u0237" => "j" + +# ɉ [LATIN SMALL LETTER J WITH STROKE] +"\u0249" => "j" + +# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] +"\u025F" => "j" + +# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] +"\u0284" => "j" + +# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] +"\u029D" => "j" + +# ⓙ [CIRCLED LATIN SMALL LETTER J] +"\u24D9" => "j" + +# â±¼ [LATIN SUBSCRIPT SMALL LETTER J] +"\u2C7C" => "j" + +# j [FULLWIDTH LATIN SMALL LETTER J] +"\uFF4A" => "j" + +# ⒥ [PARENTHESIZED LATIN SMALL LETTER J] +"\u24A5" => "(j)" + +# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] +"\u0136" => "K" + +# Ƙ [LATIN CAPITAL LETTER K WITH HOOK] +"\u0198" => "K" + +# Ǩ [LATIN CAPITAL LETTER K WITH CARON] +"\u01E8" => "K" + +# ᴋ [LATIN LETTER SMALL CAPITAL K] +"\u1D0B" => "K" + +# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] +"\u1E30" => "K" + +# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] +"\u1E32" => "K" + +# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] +"\u1E34" => "K" + +# Ⓚ [CIRCLED LATIN CAPITAL LETTER K] +"\u24C0" => "K" + +# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] +"\u2C69" => "K" + +# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] +"\uA740" => "K" + +# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] +"\uA742" => "K" + +# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] +"\uA744" => "K" + +# K [FULLWIDTH LATIN CAPITAL LETTER K] +"\uFF2B" => "K" + +# Ä· [LATIN SMALL LETTER K WITH CEDILLA] +"\u0137" => "k" + +# ƙ [LATIN SMALL LETTER K WITH HOOK] +"\u0199" => "k" + +# Ç© [LATIN SMALL LETTER K WITH CARON] +"\u01E9" => "k" + +# ʞ [LATIN SMALL LETTER TURNED K] +"\u029E" => "k" + +# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] +"\u1D84" => "k" + +# ḱ [LATIN SMALL LETTER K WITH ACUTE] +"\u1E31" => "k" + +# ḳ [LATIN SMALL LETTER K WITH DOT BELOW] +"\u1E33" => "k" + +# ḵ [LATIN SMALL LETTER K WITH LINE BELOW] +"\u1E35" => "k" + +# ⓚ [CIRCLED LATIN SMALL LETTER K] +"\u24DA" => "k" + +# ⱪ [LATIN SMALL LETTER K WITH DESCENDER] +"\u2C6A" => "k" + +# ꝁ [LATIN SMALL LETTER K WITH STROKE] +"\uA741" => "k" + +# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] +"\uA743" => "k" + +# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] +"\uA745" => "k" + +# k [FULLWIDTH LATIN SMALL LETTER K] +"\uFF4B" => "k" + +# ⒦ [PARENTHESIZED LATIN SMALL LETTER K] +"\u24A6" => "(k)" + +# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] +"\u0139" => "L" + +# Ä» [LATIN CAPITAL LETTER L WITH CEDILLA] +"\u013B" => "L" + +# Ľ [LATIN CAPITAL LETTER L WITH CARON] +"\u013D" => "L" + +# Ä¿ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] +"\u013F" => "L" + +# Ł [LATIN CAPITAL LETTER L WITH STROKE] +"\u0141" => "L" + +# Ƚ [LATIN CAPITAL LETTER L WITH BAR] +"\u023D" => "L" + +# ʟ [LATIN LETTER SMALL CAPITAL L] +"\u029F" => "L" + +# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] +"\u1D0C" => "L" + +# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] +"\u1E36" => "L" + +# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] +"\u1E38" => "L" + +# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] +"\u1E3A" => "L" + +# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] +"\u1E3C" => "L" + +# Ⓛ [CIRCLED LATIN CAPITAL LETTER L] +"\u24C1" => "L" + +# â±  [LATIN CAPITAL LETTER L WITH DOUBLE BAR] +"\u2C60" => "L" + +# â±¢ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] +"\u2C62" => "L" + +# Ꝇ [LATIN CAPITAL LETTER BROKEN L] +"\uA746" => "L" + +# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] +"\uA748" => "L" + +# Ꞁ [LATIN CAPITAL LETTER TURNED L] +"\uA780" => "L" + +# L [FULLWIDTH LATIN CAPITAL LETTER L] +"\uFF2C" => "L" + +# ĺ [LATIN SMALL LETTER L WITH ACUTE] +"\u013A" => "l" + +# ļ [LATIN SMALL LETTER L WITH CEDILLA] +"\u013C" => "l" + +# ľ [LATIN SMALL LETTER L WITH CARON] +"\u013E" => "l" + +# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] +"\u0140" => "l" + +# ł [LATIN SMALL LETTER L WITH STROKE] +"\u0142" => "l" + +# ƚ [LATIN SMALL LETTER L WITH BAR] +"\u019A" => "l" + +# È´ [LATIN SMALL LETTER L WITH CURL] +"\u0234" => "l" + +# É« [LATIN SMALL LETTER L WITH MIDDLE TILDE] +"\u026B" => "l" + +# ɬ [LATIN SMALL LETTER L WITH BELT] +"\u026C" => "l" + +# É­ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] +"\u026D" => "l" + +# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] +"\u1D85" => "l" + +# ḷ [LATIN SMALL LETTER L WITH DOT BELOW] +"\u1E37" => "l" + +# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] +"\u1E39" => "l" + +# ḻ [LATIN SMALL LETTER L WITH LINE BELOW] +"\u1E3B" => "l" + +# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] +"\u1E3D" => "l" + +# ⓛ [CIRCLED LATIN SMALL LETTER L] +"\u24DB" => "l" + +# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] +"\u2C61" => "l" + +# ꝇ [LATIN SMALL LETTER BROKEN L] +"\uA747" => "l" + +# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] +"\uA749" => "l" + +# ꞁ [LATIN SMALL LETTER TURNED L] +"\uA781" => "l" + +# l [FULLWIDTH LATIN SMALL LETTER L] +"\uFF4C" => "l" + +# LJ [LATIN CAPITAL LETTER LJ] +"\u01C7" => "LJ" + +# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] +"\u1EFA" => "LL" + +# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] +"\u01C8" => "Lj" + +# ⒧ [PARENTHESIZED LATIN SMALL LETTER L] +"\u24A7" => "(l)" + +# lj [LATIN SMALL LETTER LJ] +"\u01C9" => "lj" + +# á»» [LATIN SMALL LETTER MIDDLE-WELSH LL] +"\u1EFB" => "ll" + +# ʪ [LATIN SMALL LETTER LS DIGRAPH] +"\u02AA" => "ls" + +# Ê« [LATIN SMALL LETTER LZ DIGRAPH] +"\u02AB" => "lz" + +# Ɯ [LATIN CAPITAL LETTER TURNED M] +"\u019C" => "M" + +# ᴍ [LATIN LETTER SMALL CAPITAL M] +"\u1D0D" => "M" + +# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] +"\u1E3E" => "M" + +# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] +"\u1E40" => "M" + +# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] +"\u1E42" => "M" + +# Ⓜ [CIRCLED LATIN CAPITAL LETTER M] +"\u24C2" => "M" + +# â±® [LATIN CAPITAL LETTER M WITH HOOK] +"\u2C6E" => "M" + +# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] +"\uA7FD" => "M" + +# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] +"\uA7FF" => "M" + +# ï¼­ [FULLWIDTH LATIN CAPITAL LETTER M] +"\uFF2D" => "M" + +# ɯ [LATIN SMALL LETTER TURNED M] +"\u026F" => "m" + +# É° [LATIN SMALL LETTER TURNED M WITH LONG LEG] +"\u0270" => "m" + +# ɱ [LATIN SMALL LETTER M WITH HOOK] +"\u0271" => "m" + +# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] +"\u1D6F" => "m" + +# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] +"\u1D86" => "m" + +# ḿ [LATIN SMALL LETTER M WITH ACUTE] +"\u1E3F" => "m" + +# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] +"\u1E41" => "m" + +# ṃ [LATIN SMALL LETTER M WITH DOT BELOW] +"\u1E43" => "m" + +# ⓜ [CIRCLED LATIN SMALL LETTER M] +"\u24DC" => "m" + +# m [FULLWIDTH LATIN SMALL LETTER M] +"\uFF4D" => "m" + +# ⒨ [PARENTHESIZED LATIN SMALL LETTER M] +"\u24A8" => "(m)" + +# Ñ [LATIN CAPITAL LETTER N WITH TILDE] +"\u00D1" => "N" + +# Ń [LATIN CAPITAL LETTER N WITH ACUTE] +"\u0143" => "N" + +# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] +"\u0145" => "N" + +# Ň [LATIN CAPITAL LETTER N WITH CARON] +"\u0147" => "N" + +# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] +"\u014A" => "N" + +# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] +"\u019D" => "N" + +# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] +"\u01F8" => "N" + +# È  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] +"\u0220" => "N" + +# É´ [LATIN LETTER SMALL CAPITAL N] +"\u0274" => "N" + +# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] +"\u1D0E" => "N" + +# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] +"\u1E44" => "N" + +# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] +"\u1E46" => "N" + +# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] +"\u1E48" => "N" + +# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] +"\u1E4A" => "N" + +# Ⓝ [CIRCLED LATIN CAPITAL LETTER N] +"\u24C3" => "N" + +# ï¼® [FULLWIDTH LATIN CAPITAL LETTER N] +"\uFF2E" => "N" + +# ñ [LATIN SMALL LETTER N WITH TILDE] +"\u00F1" => "n" + +# ń [LATIN SMALL LETTER N WITH ACUTE] +"\u0144" => "n" + +# ņ [LATIN SMALL LETTER N WITH CEDILLA] +"\u0146" => "n" + +# ň [LATIN SMALL LETTER N WITH CARON] +"\u0148" => "n" + +# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] +"\u0149" => "n" + +# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] +"\u014B" => "n" + +# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] +"\u019E" => "n" + +# ǹ [LATIN SMALL LETTER N WITH GRAVE] +"\u01F9" => "n" + +# ȵ [LATIN SMALL LETTER N WITH CURL] +"\u0235" => "n" + +# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] +"\u0272" => "n" + +# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] +"\u0273" => "n" + +# áµ° [LATIN SMALL LETTER N WITH MIDDLE TILDE] +"\u1D70" => "n" + +# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] +"\u1D87" => "n" + +# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] +"\u1E45" => "n" + +# ṇ [LATIN SMALL LETTER N WITH DOT BELOW] +"\u1E47" => "n" + +# ṉ [LATIN SMALL LETTER N WITH LINE BELOW] +"\u1E49" => "n" + +# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] +"\u1E4B" => "n" + +# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] +"\u207F" => "n" + +# ⓝ [CIRCLED LATIN SMALL LETTER N] +"\u24DD" => "n" + +# n [FULLWIDTH LATIN SMALL LETTER N] +"\uFF4E" => "n" + +# NJ [LATIN CAPITAL LETTER NJ] +"\u01CA" => "NJ" + +# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] +"\u01CB" => "Nj" + +# ⒩ [PARENTHESIZED LATIN SMALL LETTER N] +"\u24A9" => "(n)" + +# nj [LATIN SMALL LETTER NJ] +"\u01CC" => "nj" + +# Ò [LATIN CAPITAL LETTER O WITH GRAVE] +"\u00D2" => "O" + +# Ó [LATIN CAPITAL LETTER O WITH ACUTE] +"\u00D3" => "O" + +# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] +"\u00D4" => "O" + +# Õ [LATIN CAPITAL LETTER O WITH TILDE] +"\u00D5" => "O" + +# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] +"\u00D6" => "O" + +# Ø [LATIN CAPITAL LETTER O WITH STROKE] +"\u00D8" => "O" + +# Ō [LATIN CAPITAL LETTER O WITH MACRON] +"\u014C" => "O" + +# Ŏ [LATIN CAPITAL LETTER O WITH BREVE] +"\u014E" => "O" + +# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] +"\u0150" => "O" + +# Ɔ [LATIN CAPITAL LETTER OPEN O] +"\u0186" => "O" + +# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] +"\u019F" => "O" + +# Æ  [LATIN CAPITAL LETTER O WITH HORN] +"\u01A0" => "O" + +# Ǒ [LATIN CAPITAL LETTER O WITH CARON] +"\u01D1" => "O" + +# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] +"\u01EA" => "O" + +# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] +"\u01EC" => "O" + +# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] +"\u01FE" => "O" + +# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] +"\u020C" => "O" + +# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] +"\u020E" => "O" + +# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] +"\u022A" => "O" + +# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] +"\u022C" => "O" + +# È® [LATIN CAPITAL LETTER O WITH DOT ABOVE] +"\u022E" => "O" + +# È° [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] +"\u0230" => "O" + +# ᴏ [LATIN LETTER SMALL CAPITAL O] +"\u1D0F" => "O" + +# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] +"\u1D10" => "O" + +# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] +"\u1E4C" => "O" + +# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] +"\u1E4E" => "O" + +# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] +"\u1E50" => "O" + +# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] +"\u1E52" => "O" + +# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] +"\u1ECC" => "O" + +# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] +"\u1ECE" => "O" + +# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] +"\u1ED0" => "O" + +# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] +"\u1ED2" => "O" + +# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1ED4" => "O" + +# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] +"\u1ED6" => "O" + +# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] +"\u1ED8" => "O" + +# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] +"\u1EDA" => "O" + +# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] +"\u1EDC" => "O" + +# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] +"\u1EDE" => "O" + +# á»  [LATIN CAPITAL LETTER O WITH HORN AND TILDE] +"\u1EE0" => "O" + +# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] +"\u1EE2" => "O" + +# Ⓞ [CIRCLED LATIN CAPITAL LETTER O] +"\u24C4" => "O" + +# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] +"\uA74A" => "O" + +# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] +"\uA74C" => "O" + +# O [FULLWIDTH LATIN CAPITAL LETTER O] +"\uFF2F" => "O" + +# ò [LATIN SMALL LETTER O WITH GRAVE] +"\u00F2" => "o" + +# ó [LATIN SMALL LETTER O WITH ACUTE] +"\u00F3" => "o" + +# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] +"\u00F4" => "o" + +# õ [LATIN SMALL LETTER O WITH TILDE] +"\u00F5" => "o" + +# ö [LATIN SMALL LETTER O WITH DIAERESIS] +"\u00F6" => "o" + +# ø [LATIN SMALL LETTER O WITH STROKE] +"\u00F8" => "o" + +# ō [LATIN SMALL LETTER O WITH MACRON] +"\u014D" => "o" + +# ŏ [LATIN SMALL LETTER O WITH BREVE] +"\u014F" => "o" + +# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] +"\u0151" => "o" + +# Æ¡ [LATIN SMALL LETTER O WITH HORN] +"\u01A1" => "o" + +# ǒ [LATIN SMALL LETTER O WITH CARON] +"\u01D2" => "o" + +# Ç« [LATIN SMALL LETTER O WITH OGONEK] +"\u01EB" => "o" + +# Ç­ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] +"\u01ED" => "o" + +# Ç¿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] +"\u01FF" => "o" + +# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] +"\u020D" => "o" + +# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] +"\u020F" => "o" + +# È« [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] +"\u022B" => "o" + +# È­ [LATIN SMALL LETTER O WITH TILDE AND MACRON] +"\u022D" => "o" + +# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] +"\u022F" => "o" + +# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] +"\u0231" => "o" + +# ɔ [LATIN SMALL LETTER OPEN O] +"\u0254" => "o" + +# ɵ [LATIN SMALL LETTER BARRED O] +"\u0275" => "o" + +# ᴖ [LATIN SMALL LETTER TOP HALF O] +"\u1D16" => "o" + +# ᴗ [LATIN SMALL LETTER BOTTOM HALF O] +"\u1D17" => "o" + +# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] +"\u1D97" => "o" + +# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] +"\u1E4D" => "o" + +# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] +"\u1E4F" => "o" + +# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] +"\u1E51" => "o" + +# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] +"\u1E53" => "o" + +# ọ [LATIN SMALL LETTER O WITH DOT BELOW] +"\u1ECD" => "o" + +# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] +"\u1ECF" => "o" + +# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] +"\u1ED1" => "o" + +# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] +"\u1ED3" => "o" + +# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1ED5" => "o" + +# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] +"\u1ED7" => "o" + +# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] +"\u1ED9" => "o" + +# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] +"\u1EDB" => "o" + +# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] +"\u1EDD" => "o" + +# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] +"\u1EDF" => "o" + +# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] +"\u1EE1" => "o" + +# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] +"\u1EE3" => "o" + +# ₒ [LATIN SUBSCRIPT SMALL LETTER O] +"\u2092" => "o" + +# ⓞ [CIRCLED LATIN SMALL LETTER O] +"\u24DE" => "o" + +# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] +"\u2C7A" => "o" + +# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] +"\uA74B" => "o" + +# ꝍ [LATIN SMALL LETTER O WITH LOOP] +"\uA74D" => "o" + +# o [FULLWIDTH LATIN SMALL LETTER O] +"\uFF4F" => "o" + +# Œ [LATIN CAPITAL LIGATURE OE] +"\u0152" => "OE" + +# ɶ [LATIN LETTER SMALL CAPITAL OE] +"\u0276" => "OE" + +# Ꝏ [LATIN CAPITAL LETTER OO] +"\uA74E" => "OO" + +# È¢ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] +"\u0222" => "OU" + +# ᴕ [LATIN LETTER SMALL CAPITAL OU] +"\u1D15" => "OU" + +# ⒪ [PARENTHESIZED LATIN SMALL LETTER O] +"\u24AA" => "(o)" + +# œ [LATIN SMALL LIGATURE OE] +"\u0153" => "oe" + +# ᴔ [LATIN SMALL LETTER TURNED OE] +"\u1D14" => "oe" + +# ꝏ [LATIN SMALL LETTER OO] +"\uA74F" => "oo" + +# È£ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] +"\u0223" => "ou" + +# Ƥ [LATIN CAPITAL LETTER P WITH HOOK] +"\u01A4" => "P" + +# ᴘ [LATIN LETTER SMALL CAPITAL P] +"\u1D18" => "P" + +# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] +"\u1E54" => "P" + +# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] +"\u1E56" => "P" + +# Ⓟ [CIRCLED LATIN CAPITAL LETTER P] +"\u24C5" => "P" + +# â±£ [LATIN CAPITAL LETTER P WITH STROKE] +"\u2C63" => "P" + +# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] +"\uA750" => "P" + +# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] +"\uA752" => "P" + +# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] +"\uA754" => "P" + +# ï¼° [FULLWIDTH LATIN CAPITAL LETTER P] +"\uFF30" => "P" + +# Æ¥ [LATIN SMALL LETTER P WITH HOOK] +"\u01A5" => "p" + +# áµ± [LATIN SMALL LETTER P WITH MIDDLE TILDE] +"\u1D71" => "p" + +# áµ½ [LATIN SMALL LETTER P WITH STROKE] +"\u1D7D" => "p" + +# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] +"\u1D88" => "p" + +# ṕ [LATIN SMALL LETTER P WITH ACUTE] +"\u1E55" => "p" + +# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] +"\u1E57" => "p" + +# ⓟ [CIRCLED LATIN SMALL LETTER P] +"\u24DF" => "p" + +# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] +"\uA751" => "p" + +# ꝓ [LATIN SMALL LETTER P WITH FLOURISH] +"\uA753" => "p" + +# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] +"\uA755" => "p" + +# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] +"\uA7FC" => "p" + +# p [FULLWIDTH LATIN SMALL LETTER P] +"\uFF50" => "p" + +# ⒫ [PARENTHESIZED LATIN SMALL LETTER P] +"\u24AB" => "(p)" + +# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] +"\u024A" => "Q" + +# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] +"\u24C6" => "Q" + +# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] +"\uA756" => "Q" + +# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] +"\uA758" => "Q" + +# ï¼± [FULLWIDTH LATIN CAPITAL LETTER Q] +"\uFF31" => "Q" + +# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] +"\u0138" => "q" + +# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] +"\u024B" => "q" + +# Ê  [LATIN SMALL LETTER Q WITH HOOK] +"\u02A0" => "q" + +# ⓠ [CIRCLED LATIN SMALL LETTER Q] +"\u24E0" => "q" + +# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] +"\uA757" => "q" + +# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] +"\uA759" => "q" + +# q [FULLWIDTH LATIN SMALL LETTER Q] +"\uFF51" => "q" + +# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] +"\u24AC" => "(q)" + +# ȹ [LATIN SMALL LETTER QP DIGRAPH] +"\u0239" => "qp" + +# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] +"\u0154" => "R" + +# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] +"\u0156" => "R" + +# Ř [LATIN CAPITAL LETTER R WITH CARON] +"\u0158" => "R" + +# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] +"\u0210" => "R" + +# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] +"\u0212" => "R" + +# Ɍ [LATIN CAPITAL LETTER R WITH STROKE] +"\u024C" => "R" + +# ʀ [LATIN LETTER SMALL CAPITAL R] +"\u0280" => "R" + +# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] +"\u0281" => "R" + +# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] +"\u1D19" => "R" + +# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] +"\u1D1A" => "R" + +# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] +"\u1E58" => "R" + +# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] +"\u1E5A" => "R" + +# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] +"\u1E5C" => "R" + +# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] +"\u1E5E" => "R" + +# Ⓡ [CIRCLED LATIN CAPITAL LETTER R] +"\u24C7" => "R" + +# Ɽ [LATIN CAPITAL LETTER R WITH TAIL] +"\u2C64" => "R" + +# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] +"\uA75A" => "R" + +# Ꞃ [LATIN CAPITAL LETTER INSULAR R] +"\uA782" => "R" + +# ï¼² [FULLWIDTH LATIN CAPITAL LETTER R] +"\uFF32" => "R" + +# ŕ [LATIN SMALL LETTER R WITH ACUTE] +"\u0155" => "r" + +# ŗ [LATIN SMALL LETTER R WITH CEDILLA] +"\u0157" => "r" + +# ř [LATIN SMALL LETTER R WITH CARON] +"\u0159" => "r" + +# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] +"\u0211" => "r" + +# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] +"\u0213" => "r" + +# ɍ [LATIN SMALL LETTER R WITH STROKE] +"\u024D" => "r" + +# ɼ [LATIN SMALL LETTER R WITH LONG LEG] +"\u027C" => "r" + +# ɽ [LATIN SMALL LETTER R WITH TAIL] +"\u027D" => "r" + +# ɾ [LATIN SMALL LETTER R WITH FISHHOOK] +"\u027E" => "r" + +# É¿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] +"\u027F" => "r" + +# áµ£ [LATIN SUBSCRIPT SMALL LETTER R] +"\u1D63" => "r" + +# áµ² [LATIN SMALL LETTER R WITH MIDDLE TILDE] +"\u1D72" => "r" + +# áµ³ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] +"\u1D73" => "r" + +# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] +"\u1D89" => "r" + +# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] +"\u1E59" => "r" + +# ṛ [LATIN SMALL LETTER R WITH DOT BELOW] +"\u1E5B" => "r" + +# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] +"\u1E5D" => "r" + +# ṟ [LATIN SMALL LETTER R WITH LINE BELOW] +"\u1E5F" => "r" + +# ⓡ [CIRCLED LATIN SMALL LETTER R] +"\u24E1" => "r" + +# ꝛ [LATIN SMALL LETTER R ROTUNDA] +"\uA75B" => "r" + +# ꞃ [LATIN SMALL LETTER INSULAR R] +"\uA783" => "r" + +# r [FULLWIDTH LATIN SMALL LETTER R] +"\uFF52" => "r" + +# ⒭ [PARENTHESIZED LATIN SMALL LETTER R] +"\u24AD" => "(r)" + +# Ś [LATIN CAPITAL LETTER S WITH ACUTE] +"\u015A" => "S" + +# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] +"\u015C" => "S" + +# Ş [LATIN CAPITAL LETTER S WITH CEDILLA] +"\u015E" => "S" + +# Å  [LATIN CAPITAL LETTER S WITH CARON] +"\u0160" => "S" + +# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] +"\u0218" => "S" + +# á¹  [LATIN CAPITAL LETTER S WITH DOT ABOVE] +"\u1E60" => "S" + +# á¹¢ [LATIN CAPITAL LETTER S WITH DOT BELOW] +"\u1E62" => "S" + +# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] +"\u1E64" => "S" + +# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] +"\u1E66" => "S" + +# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] +"\u1E68" => "S" + +# Ⓢ [CIRCLED LATIN CAPITAL LETTER S] +"\u24C8" => "S" + +# ꜱ [LATIN LETTER SMALL CAPITAL S] +"\uA731" => "S" + +# ꞅ [LATIN SMALL LETTER INSULAR S] +"\uA785" => "S" + +# ï¼³ [FULLWIDTH LATIN CAPITAL LETTER S] +"\uFF33" => "S" + +# ś [LATIN SMALL LETTER S WITH ACUTE] +"\u015B" => "s" + +# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] +"\u015D" => "s" + +# ş [LATIN SMALL LETTER S WITH CEDILLA] +"\u015F" => "s" + +# Å¡ [LATIN SMALL LETTER S WITH CARON] +"\u0161" => "s" + +# Å¿ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] +"\u017F" => "s" + +# ș [LATIN SMALL LETTER S WITH COMMA BELOW] +"\u0219" => "s" + +# È¿ [LATIN SMALL LETTER S WITH SWASH TAIL] +"\u023F" => "s" + +# ʂ [LATIN SMALL LETTER S WITH HOOK] +"\u0282" => "s" + +# áµ´ [LATIN SMALL LETTER S WITH MIDDLE TILDE] +"\u1D74" => "s" + +# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] +"\u1D8A" => "s" + +# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] +"\u1E61" => "s" + +# á¹£ [LATIN SMALL LETTER S WITH DOT BELOW] +"\u1E63" => "s" + +# á¹¥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] +"\u1E65" => "s" + +# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] +"\u1E67" => "s" + +# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] +"\u1E69" => "s" + +# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] +"\u1E9C" => "s" + +# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] +"\u1E9D" => "s" + +# ⓢ [CIRCLED LATIN SMALL LETTER S] +"\u24E2" => "s" + +# Ꞅ [LATIN CAPITAL LETTER INSULAR S] +"\uA784" => "s" + +# s [FULLWIDTH LATIN SMALL LETTER S] +"\uFF53" => "s" + +# ẞ [LATIN CAPITAL LETTER SHARP S] +"\u1E9E" => "SS" + +# ⒮ [PARENTHESIZED LATIN SMALL LETTER S] +"\u24AE" => "(s)" + +# ß [LATIN SMALL LETTER SHARP S] +"\u00DF" => "ss" + +# st [LATIN SMALL LIGATURE ST] +"\uFB06" => "st" + +# Å¢ [LATIN CAPITAL LETTER T WITH CEDILLA] +"\u0162" => "T" + +# Ť [LATIN CAPITAL LETTER T WITH CARON] +"\u0164" => "T" + +# Ŧ [LATIN CAPITAL LETTER T WITH STROKE] +"\u0166" => "T" + +# Ƭ [LATIN CAPITAL LETTER T WITH HOOK] +"\u01AC" => "T" + +# Æ® [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] +"\u01AE" => "T" + +# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] +"\u021A" => "T" + +# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] +"\u023E" => "T" + +# ᴛ [LATIN LETTER SMALL CAPITAL T] +"\u1D1B" => "T" + +# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] +"\u1E6A" => "T" + +# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] +"\u1E6C" => "T" + +# á¹® [LATIN CAPITAL LETTER T WITH LINE BELOW] +"\u1E6E" => "T" + +# á¹° [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] +"\u1E70" => "T" + +# Ⓣ [CIRCLED LATIN CAPITAL LETTER T] +"\u24C9" => "T" + +# Ꞇ [LATIN CAPITAL LETTER INSULAR T] +"\uA786" => "T" + +# ï¼´ [FULLWIDTH LATIN CAPITAL LETTER T] +"\uFF34" => "T" + +# Å£ [LATIN SMALL LETTER T WITH CEDILLA] +"\u0163" => "t" + +# Å¥ [LATIN SMALL LETTER T WITH CARON] +"\u0165" => "t" + +# ŧ [LATIN SMALL LETTER T WITH STROKE] +"\u0167" => "t" + +# Æ« [LATIN SMALL LETTER T WITH PALATAL HOOK] +"\u01AB" => "t" + +# Æ­ [LATIN SMALL LETTER T WITH HOOK] +"\u01AD" => "t" + +# ț [LATIN SMALL LETTER T WITH COMMA BELOW] +"\u021B" => "t" + +# ȶ [LATIN SMALL LETTER T WITH CURL] +"\u0236" => "t" + +# ʇ [LATIN SMALL LETTER TURNED T] +"\u0287" => "t" + +# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] +"\u0288" => "t" + +# áµµ [LATIN SMALL LETTER T WITH MIDDLE TILDE] +"\u1D75" => "t" + +# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] +"\u1E6B" => "t" + +# á¹­ [LATIN SMALL LETTER T WITH DOT BELOW] +"\u1E6D" => "t" + +# ṯ [LATIN SMALL LETTER T WITH LINE BELOW] +"\u1E6F" => "t" + +# á¹± [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] +"\u1E71" => "t" + +# ẗ [LATIN SMALL LETTER T WITH DIAERESIS] +"\u1E97" => "t" + +# ⓣ [CIRCLED LATIN SMALL LETTER T] +"\u24E3" => "t" + +# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] +"\u2C66" => "t" + +# t [FULLWIDTH LATIN SMALL LETTER T] +"\uFF54" => "t" + +# Þ [LATIN CAPITAL LETTER THORN] +"\u00DE" => "TH" + +# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] +"\uA766" => "TH" + +# Ꜩ [LATIN CAPITAL LETTER TZ] +"\uA728" => "TZ" + +# ⒯ [PARENTHESIZED LATIN SMALL LETTER T] +"\u24AF" => "(t)" + +# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] +"\u02A8" => "tc" + +# þ [LATIN SMALL LETTER THORN] +"\u00FE" => "th" + +# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] +"\u1D7A" => "th" + +# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] +"\uA767" => "th" + +# ʦ [LATIN SMALL LETTER TS DIGRAPH] +"\u02A6" => "ts" + +# ꜩ [LATIN SMALL LETTER TZ] +"\uA729" => "tz" + +# Ù [LATIN CAPITAL LETTER U WITH GRAVE] +"\u00D9" => "U" + +# Ú [LATIN CAPITAL LETTER U WITH ACUTE] +"\u00DA" => "U" + +# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] +"\u00DB" => "U" + +# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] +"\u00DC" => "U" + +# Ũ [LATIN CAPITAL LETTER U WITH TILDE] +"\u0168" => "U" + +# Ū [LATIN CAPITAL LETTER U WITH MACRON] +"\u016A" => "U" + +# Ŭ [LATIN CAPITAL LETTER U WITH BREVE] +"\u016C" => "U" + +# Å® [LATIN CAPITAL LETTER U WITH RING ABOVE] +"\u016E" => "U" + +# Å° [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] +"\u0170" => "U" + +# Ų [LATIN CAPITAL LETTER U WITH OGONEK] +"\u0172" => "U" + +# Ư [LATIN CAPITAL LETTER U WITH HORN] +"\u01AF" => "U" + +# Ǔ [LATIN CAPITAL LETTER U WITH CARON] +"\u01D3" => "U" + +# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] +"\u01D5" => "U" + +# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] +"\u01D7" => "U" + +# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] +"\u01D9" => "U" + +# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] +"\u01DB" => "U" + +# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] +"\u0214" => "U" + +# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] +"\u0216" => "U" + +# Ʉ [LATIN CAPITAL LETTER U BAR] +"\u0244" => "U" + +# ᴜ [LATIN LETTER SMALL CAPITAL U] +"\u1D1C" => "U" + +# áµ¾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] +"\u1D7E" => "U" + +# á¹² [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] +"\u1E72" => "U" + +# á¹´ [LATIN CAPITAL LETTER U WITH TILDE BELOW] +"\u1E74" => "U" + +# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] +"\u1E76" => "U" + +# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] +"\u1E78" => "U" + +# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] +"\u1E7A" => "U" + +# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] +"\u1EE4" => "U" + +# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] +"\u1EE6" => "U" + +# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] +"\u1EE8" => "U" + +# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] +"\u1EEA" => "U" + +# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] +"\u1EEC" => "U" + +# á»® [LATIN CAPITAL LETTER U WITH HORN AND TILDE] +"\u1EEE" => "U" + +# á»° [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] +"\u1EF0" => "U" + +# Ⓤ [CIRCLED LATIN CAPITAL LETTER U] +"\u24CA" => "U" + +# ï¼µ [FULLWIDTH LATIN CAPITAL LETTER U] +"\uFF35" => "U" + +# ù [LATIN SMALL LETTER U WITH GRAVE] +"\u00F9" => "u" + +# ú [LATIN SMALL LETTER U WITH ACUTE] +"\u00FA" => "u" + +# û [LATIN SMALL LETTER U WITH CIRCUMFLEX] +"\u00FB" => "u" + +# ü [LATIN SMALL LETTER U WITH DIAERESIS] +"\u00FC" => "u" + +# Å© [LATIN SMALL LETTER U WITH TILDE] +"\u0169" => "u" + +# Å« [LATIN SMALL LETTER U WITH MACRON] +"\u016B" => "u" + +# Å­ [LATIN SMALL LETTER U WITH BREVE] +"\u016D" => "u" + +# ů [LATIN SMALL LETTER U WITH RING ABOVE] +"\u016F" => "u" + +# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] +"\u0171" => "u" + +# ų [LATIN SMALL LETTER U WITH OGONEK] +"\u0173" => "u" + +# Æ° [LATIN SMALL LETTER U WITH HORN] +"\u01B0" => "u" + +# ǔ [LATIN SMALL LETTER U WITH CARON] +"\u01D4" => "u" + +# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] +"\u01D6" => "u" + +# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] +"\u01D8" => "u" + +# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] +"\u01DA" => "u" + +# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] +"\u01DC" => "u" + +# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] +"\u0215" => "u" + +# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] +"\u0217" => "u" + +# ʉ [LATIN SMALL LETTER U BAR] +"\u0289" => "u" + +# ᵤ [LATIN SUBSCRIPT SMALL LETTER U] +"\u1D64" => "u" + +# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] +"\u1D99" => "u" + +# á¹³ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] +"\u1E73" => "u" + +# á¹µ [LATIN SMALL LETTER U WITH TILDE BELOW] +"\u1E75" => "u" + +# á¹· [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] +"\u1E77" => "u" + +# á¹¹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] +"\u1E79" => "u" + +# á¹» [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] +"\u1E7B" => "u" + +# ụ [LATIN SMALL LETTER U WITH DOT BELOW] +"\u1EE5" => "u" + +# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] +"\u1EE7" => "u" + +# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] +"\u1EE9" => "u" + +# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] +"\u1EEB" => "u" + +# á»­ [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] +"\u1EED" => "u" + +# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] +"\u1EEF" => "u" + +# á»± [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] +"\u1EF1" => "u" + +# ⓤ [CIRCLED LATIN SMALL LETTER U] +"\u24E4" => "u" + +# u [FULLWIDTH LATIN SMALL LETTER U] +"\uFF55" => "u" + +# ⒰ [PARENTHESIZED LATIN SMALL LETTER U] +"\u24B0" => "(u)" + +# ᵫ [LATIN SMALL LETTER UE] +"\u1D6B" => "ue" + +# Ʋ [LATIN CAPITAL LETTER V WITH HOOK] +"\u01B2" => "V" + +# Ʌ [LATIN CAPITAL LETTER TURNED V] +"\u0245" => "V" + +# á´  [LATIN LETTER SMALL CAPITAL V] +"\u1D20" => "V" + +# á¹¼ [LATIN CAPITAL LETTER V WITH TILDE] +"\u1E7C" => "V" + +# á¹¾ [LATIN CAPITAL LETTER V WITH DOT BELOW] +"\u1E7E" => "V" + +# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] +"\u1EFC" => "V" + +# Ⓥ [CIRCLED LATIN CAPITAL LETTER V] +"\u24CB" => "V" + +# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] +"\uA75E" => "V" + +# Ꝩ [LATIN CAPITAL LETTER VEND] +"\uA768" => "V" + +# V [FULLWIDTH LATIN CAPITAL LETTER V] +"\uFF36" => "V" + +# ʋ [LATIN SMALL LETTER V WITH HOOK] +"\u028B" => "v" + +# ʌ [LATIN SMALL LETTER TURNED V] +"\u028C" => "v" + +# áµ¥ [LATIN SUBSCRIPT SMALL LETTER V] +"\u1D65" => "v" + +# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] +"\u1D8C" => "v" + +# á¹½ [LATIN SMALL LETTER V WITH TILDE] +"\u1E7D" => "v" + +# ṿ [LATIN SMALL LETTER V WITH DOT BELOW] +"\u1E7F" => "v" + +# ⓥ [CIRCLED LATIN SMALL LETTER V] +"\u24E5" => "v" + +# â±± [LATIN SMALL LETTER V WITH RIGHT HOOK] +"\u2C71" => "v" + +# â±´ [LATIN SMALL LETTER V WITH CURL] +"\u2C74" => "v" + +# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] +"\uA75F" => "v" + +# v [FULLWIDTH LATIN SMALL LETTER V] +"\uFF56" => "v" + +# Ꝡ [LATIN CAPITAL LETTER VY] +"\uA760" => "VY" + +# ⒱ [PARENTHESIZED LATIN SMALL LETTER V] +"\u24B1" => "(v)" + +# ꝡ [LATIN SMALL LETTER VY] +"\uA761" => "vy" + +# Å´ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] +"\u0174" => "W" + +# Ç· http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] +"\u01F7" => "W" + +# á´¡ [LATIN LETTER SMALL CAPITAL W] +"\u1D21" => "W" + +# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] +"\u1E80" => "W" + +# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] +"\u1E82" => "W" + +# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] +"\u1E84" => "W" + +# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] +"\u1E86" => "W" + +# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] +"\u1E88" => "W" + +# Ⓦ [CIRCLED LATIN CAPITAL LETTER W] +"\u24CC" => "W" + +# â±² [LATIN CAPITAL LETTER W WITH HOOK] +"\u2C72" => "W" + +# ï¼· [FULLWIDTH LATIN CAPITAL LETTER W] +"\uFF37" => "W" + +# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] +"\u0175" => "w" + +# Æ¿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] +"\u01BF" => "w" + +# ʍ [LATIN SMALL LETTER TURNED W] +"\u028D" => "w" + +# ẁ [LATIN SMALL LETTER W WITH GRAVE] +"\u1E81" => "w" + +# ẃ [LATIN SMALL LETTER W WITH ACUTE] +"\u1E83" => "w" + +# ẅ [LATIN SMALL LETTER W WITH DIAERESIS] +"\u1E85" => "w" + +# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] +"\u1E87" => "w" + +# ẉ [LATIN SMALL LETTER W WITH DOT BELOW] +"\u1E89" => "w" + +# ẘ [LATIN SMALL LETTER W WITH RING ABOVE] +"\u1E98" => "w" + +# ⓦ [CIRCLED LATIN SMALL LETTER W] +"\u24E6" => "w" + +# â±³ [LATIN SMALL LETTER W WITH HOOK] +"\u2C73" => "w" + +# w [FULLWIDTH LATIN SMALL LETTER W] +"\uFF57" => "w" + +# ⒲ [PARENTHESIZED LATIN SMALL LETTER W] +"\u24B2" => "(w)" + +# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] +"\u1E8A" => "X" + +# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] +"\u1E8C" => "X" + +# Ⓧ [CIRCLED LATIN CAPITAL LETTER X] +"\u24CD" => "X" + +# X [FULLWIDTH LATIN CAPITAL LETTER X] +"\uFF38" => "X" + +# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] +"\u1D8D" => "x" + +# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] +"\u1E8B" => "x" + +# ẍ [LATIN SMALL LETTER X WITH DIAERESIS] +"\u1E8D" => "x" + +# ₓ [LATIN SUBSCRIPT SMALL LETTER X] +"\u2093" => "x" + +# ⓧ [CIRCLED LATIN SMALL LETTER X] +"\u24E7" => "x" + +# x [FULLWIDTH LATIN SMALL LETTER X] +"\uFF58" => "x" + +# ⒳ [PARENTHESIZED LATIN SMALL LETTER X] +"\u24B3" => "(x)" + +# Ý [LATIN CAPITAL LETTER Y WITH ACUTE] +"\u00DD" => "Y" + +# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] +"\u0176" => "Y" + +# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] +"\u0178" => "Y" + +# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] +"\u01B3" => "Y" + +# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] +"\u0232" => "Y" + +# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] +"\u024E" => "Y" + +# ʏ [LATIN LETTER SMALL CAPITAL Y] +"\u028F" => "Y" + +# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] +"\u1E8E" => "Y" + +# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] +"\u1EF2" => "Y" + +# á»´ [LATIN CAPITAL LETTER Y WITH DOT BELOW] +"\u1EF4" => "Y" + +# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] +"\u1EF6" => "Y" + +# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] +"\u1EF8" => "Y" + +# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] +"\u1EFE" => "Y" + +# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] +"\u24CE" => "Y" + +# ï¼¹ [FULLWIDTH LATIN CAPITAL LETTER Y] +"\uFF39" => "Y" + +# ý [LATIN SMALL LETTER Y WITH ACUTE] +"\u00FD" => "y" + +# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] +"\u00FF" => "y" + +# Å· [LATIN SMALL LETTER Y WITH CIRCUMFLEX] +"\u0177" => "y" + +# Æ´ [LATIN SMALL LETTER Y WITH HOOK] +"\u01B4" => "y" + +# ȳ [LATIN SMALL LETTER Y WITH MACRON] +"\u0233" => "y" + +# ɏ [LATIN SMALL LETTER Y WITH STROKE] +"\u024F" => "y" + +# ʎ [LATIN SMALL LETTER TURNED Y] +"\u028E" => "y" + +# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] +"\u1E8F" => "y" + +# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] +"\u1E99" => "y" + +# ỳ [LATIN SMALL LETTER Y WITH GRAVE] +"\u1EF3" => "y" + +# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] +"\u1EF5" => "y" + +# á»· [LATIN SMALL LETTER Y WITH HOOK ABOVE] +"\u1EF7" => "y" + +# ỹ [LATIN SMALL LETTER Y WITH TILDE] +"\u1EF9" => "y" + +# ỿ [LATIN SMALL LETTER Y WITH LOOP] +"\u1EFF" => "y" + +# ⓨ [CIRCLED LATIN SMALL LETTER Y] +"\u24E8" => "y" + +# y [FULLWIDTH LATIN SMALL LETTER Y] +"\uFF59" => "y" + +# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] +"\u24B4" => "(y)" + +# Ź [LATIN CAPITAL LETTER Z WITH ACUTE] +"\u0179" => "Z" + +# Å» [LATIN CAPITAL LETTER Z WITH DOT ABOVE] +"\u017B" => "Z" + +# Ž [LATIN CAPITAL LETTER Z WITH CARON] +"\u017D" => "Z" + +# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] +"\u01B5" => "Z" + +# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] +"\u021C" => "Z" + +# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] +"\u0224" => "Z" + +# á´¢ [LATIN LETTER SMALL CAPITAL Z] +"\u1D22" => "Z" + +# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] +"\u1E90" => "Z" + +# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] +"\u1E92" => "Z" + +# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] +"\u1E94" => "Z" + +# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] +"\u24CF" => "Z" + +# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] +"\u2C6B" => "Z" + +# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] +"\uA762" => "Z" + +# Z [FULLWIDTH LATIN CAPITAL LETTER Z] +"\uFF3A" => "Z" + +# ź [LATIN SMALL LETTER Z WITH ACUTE] +"\u017A" => "z" + +# ż [LATIN SMALL LETTER Z WITH DOT ABOVE] +"\u017C" => "z" + +# ž [LATIN SMALL LETTER Z WITH CARON] +"\u017E" => "z" + +# ƶ [LATIN SMALL LETTER Z WITH STROKE] +"\u01B6" => "z" + +# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] +"\u021D" => "z" + +# È¥ [LATIN SMALL LETTER Z WITH HOOK] +"\u0225" => "z" + +# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] +"\u0240" => "z" + +# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] +"\u0290" => "z" + +# ʑ [LATIN SMALL LETTER Z WITH CURL] +"\u0291" => "z" + +# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] +"\u1D76" => "z" + +# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] +"\u1D8E" => "z" + +# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] +"\u1E91" => "z" + +# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] +"\u1E93" => "z" + +# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] +"\u1E95" => "z" + +# ⓩ [CIRCLED LATIN SMALL LETTER Z] +"\u24E9" => "z" + +# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] +"\u2C6C" => "z" + +# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] +"\uA763" => "z" + +# z [FULLWIDTH LATIN SMALL LETTER Z] +"\uFF5A" => "z" + +# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] +"\u24B5" => "(z)" + +# ⁰ [SUPERSCRIPT ZERO] +"\u2070" => "0" + +# ₀ [SUBSCRIPT ZERO] +"\u2080" => "0" + +# ⓪ [CIRCLED DIGIT ZERO] +"\u24EA" => "0" + +# ⓿ [NEGATIVE CIRCLED DIGIT ZERO] +"\u24FF" => "0" + +# 0 [FULLWIDTH DIGIT ZERO] +"\uFF10" => "0" + +# ¹ [SUPERSCRIPT ONE] +"\u00B9" => "1" + +# ₁ [SUBSCRIPT ONE] +"\u2081" => "1" + +# ① [CIRCLED DIGIT ONE] +"\u2460" => "1" + +# ⓵ [DOUBLE CIRCLED DIGIT ONE] +"\u24F5" => "1" + +# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] +"\u2776" => "1" + +# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] +"\u2780" => "1" + +# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] +"\u278A" => "1" + +# 1 [FULLWIDTH DIGIT ONE] +"\uFF11" => "1" + +# ⒈ [DIGIT ONE FULL STOP] +"\u2488" => "1." + +# ⑴ [PARENTHESIZED DIGIT ONE] +"\u2474" => "(1)" + +# ² [SUPERSCRIPT TWO] +"\u00B2" => "2" + +# ₂ [SUBSCRIPT TWO] +"\u2082" => "2" + +# ② [CIRCLED DIGIT TWO] +"\u2461" => "2" + +# ⓶ [DOUBLE CIRCLED DIGIT TWO] +"\u24F6" => "2" + +# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] +"\u2777" => "2" + +# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] +"\u2781" => "2" + +# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] +"\u278B" => "2" + +# 2 [FULLWIDTH DIGIT TWO] +"\uFF12" => "2" + +# ⒉ [DIGIT TWO FULL STOP] +"\u2489" => "2." + +# ⑵ [PARENTHESIZED DIGIT TWO] +"\u2475" => "(2)" + +# ³ [SUPERSCRIPT THREE] +"\u00B3" => "3" + +# ₃ [SUBSCRIPT THREE] +"\u2083" => "3" + +# ③ [CIRCLED DIGIT THREE] +"\u2462" => "3" + +# ⓷ [DOUBLE CIRCLED DIGIT THREE] +"\u24F7" => "3" + +# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] +"\u2778" => "3" + +# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] +"\u2782" => "3" + +# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] +"\u278C" => "3" + +# 3 [FULLWIDTH DIGIT THREE] +"\uFF13" => "3" + +# ⒊ [DIGIT THREE FULL STOP] +"\u248A" => "3." + +# ⑶ [PARENTHESIZED DIGIT THREE] +"\u2476" => "(3)" + +# ⁴ [SUPERSCRIPT FOUR] +"\u2074" => "4" + +# ₄ [SUBSCRIPT FOUR] +"\u2084" => "4" + +# ④ [CIRCLED DIGIT FOUR] +"\u2463" => "4" + +# ⓸ [DOUBLE CIRCLED DIGIT FOUR] +"\u24F8" => "4" + +# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] +"\u2779" => "4" + +# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] +"\u2783" => "4" + +# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] +"\u278D" => "4" + +# 4 [FULLWIDTH DIGIT FOUR] +"\uFF14" => "4" + +# ⒋ [DIGIT FOUR FULL STOP] +"\u248B" => "4." + +# ⑷ [PARENTHESIZED DIGIT FOUR] +"\u2477" => "(4)" + +# ⁵ [SUPERSCRIPT FIVE] +"\u2075" => "5" + +# ₅ [SUBSCRIPT FIVE] +"\u2085" => "5" + +# ⑤ [CIRCLED DIGIT FIVE] +"\u2464" => "5" + +# ⓹ [DOUBLE CIRCLED DIGIT FIVE] +"\u24F9" => "5" + +# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] +"\u277A" => "5" + +# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] +"\u2784" => "5" + +# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] +"\u278E" => "5" + +# 5 [FULLWIDTH DIGIT FIVE] +"\uFF15" => "5" + +# ⒌ [DIGIT FIVE FULL STOP] +"\u248C" => "5." + +# ⑸ [PARENTHESIZED DIGIT FIVE] +"\u2478" => "(5)" + +# ⁶ [SUPERSCRIPT SIX] +"\u2076" => "6" + +# ₆ [SUBSCRIPT SIX] +"\u2086" => "6" + +# ⑥ [CIRCLED DIGIT SIX] +"\u2465" => "6" + +# ⓺ [DOUBLE CIRCLED DIGIT SIX] +"\u24FA" => "6" + +# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] +"\u277B" => "6" + +# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] +"\u2785" => "6" + +# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] +"\u278F" => "6" + +# 6 [FULLWIDTH DIGIT SIX] +"\uFF16" => "6" + +# ⒍ [DIGIT SIX FULL STOP] +"\u248D" => "6." + +# ⑹ [PARENTHESIZED DIGIT SIX] +"\u2479" => "(6)" + +# ⁷ [SUPERSCRIPT SEVEN] +"\u2077" => "7" + +# ₇ [SUBSCRIPT SEVEN] +"\u2087" => "7" + +# ⑦ [CIRCLED DIGIT SEVEN] +"\u2466" => "7" + +# ⓻ [DOUBLE CIRCLED DIGIT SEVEN] +"\u24FB" => "7" + +# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] +"\u277C" => "7" + +# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] +"\u2786" => "7" + +# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] +"\u2790" => "7" + +# 7 [FULLWIDTH DIGIT SEVEN] +"\uFF17" => "7" + +# ⒎ [DIGIT SEVEN FULL STOP] +"\u248E" => "7." + +# ⑺ [PARENTHESIZED DIGIT SEVEN] +"\u247A" => "(7)" + +# ⁸ [SUPERSCRIPT EIGHT] +"\u2078" => "8" + +# ₈ [SUBSCRIPT EIGHT] +"\u2088" => "8" + +# ⑧ [CIRCLED DIGIT EIGHT] +"\u2467" => "8" + +# ⓼ [DOUBLE CIRCLED DIGIT EIGHT] +"\u24FC" => "8" + +# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] +"\u277D" => "8" + +# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] +"\u2787" => "8" + +# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] +"\u2791" => "8" + +# 8 [FULLWIDTH DIGIT EIGHT] +"\uFF18" => "8" + +# ⒏ [DIGIT EIGHT FULL STOP] +"\u248F" => "8." + +# ⑻ [PARENTHESIZED DIGIT EIGHT] +"\u247B" => "(8)" + +# ⁹ [SUPERSCRIPT NINE] +"\u2079" => "9" + +# ₉ [SUBSCRIPT NINE] +"\u2089" => "9" + +# ⑨ [CIRCLED DIGIT NINE] +"\u2468" => "9" + +# ⓽ [DOUBLE CIRCLED DIGIT NINE] +"\u24FD" => "9" + +# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] +"\u277E" => "9" + +# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] +"\u2788" => "9" + +# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] +"\u2792" => "9" + +# 9 [FULLWIDTH DIGIT NINE] +"\uFF19" => "9" + +# ⒐ [DIGIT NINE FULL STOP] +"\u2490" => "9." + +# ⑼ [PARENTHESIZED DIGIT NINE] +"\u247C" => "(9)" + +# ⑩ [CIRCLED NUMBER TEN] +"\u2469" => "10" + +# ⓾ [DOUBLE CIRCLED NUMBER TEN] +"\u24FE" => "10" + +# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] +"\u277F" => "10" + +# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] +"\u2789" => "10" + +# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] +"\u2793" => "10" + +# ⒑ [NUMBER TEN FULL STOP] +"\u2491" => "10." + +# ⑽ [PARENTHESIZED NUMBER TEN] +"\u247D" => "(10)" + +# ⑪ [CIRCLED NUMBER ELEVEN] +"\u246A" => "11" + +# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] +"\u24EB" => "11" + +# ⒒ [NUMBER ELEVEN FULL STOP] +"\u2492" => "11." + +# ⑾ [PARENTHESIZED NUMBER ELEVEN] +"\u247E" => "(11)" + +# ⑫ [CIRCLED NUMBER TWELVE] +"\u246B" => "12" + +# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] +"\u24EC" => "12" + +# ⒓ [NUMBER TWELVE FULL STOP] +"\u2493" => "12." + +# ⑿ [PARENTHESIZED NUMBER TWELVE] +"\u247F" => "(12)" + +# ⑬ [CIRCLED NUMBER THIRTEEN] +"\u246C" => "13" + +# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] +"\u24ED" => "13" + +# ⒔ [NUMBER THIRTEEN FULL STOP] +"\u2494" => "13." + +# ⒀ [PARENTHESIZED NUMBER THIRTEEN] +"\u2480" => "(13)" + +# ⑭ [CIRCLED NUMBER FOURTEEN] +"\u246D" => "14" + +# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] +"\u24EE" => "14" + +# ⒕ [NUMBER FOURTEEN FULL STOP] +"\u2495" => "14." + +# ⒁ [PARENTHESIZED NUMBER FOURTEEN] +"\u2481" => "(14)" + +# ⑮ [CIRCLED NUMBER FIFTEEN] +"\u246E" => "15" + +# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] +"\u24EF" => "15" + +# ⒖ [NUMBER FIFTEEN FULL STOP] +"\u2496" => "15." + +# ⒂ [PARENTHESIZED NUMBER FIFTEEN] +"\u2482" => "(15)" + +# ⑯ [CIRCLED NUMBER SIXTEEN] +"\u246F" => "16" + +# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] +"\u24F0" => "16" + +# ⒗ [NUMBER SIXTEEN FULL STOP] +"\u2497" => "16." + +# ⒃ [PARENTHESIZED NUMBER SIXTEEN] +"\u2483" => "(16)" + +# ⑰ [CIRCLED NUMBER SEVENTEEN] +"\u2470" => "17" + +# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] +"\u24F1" => "17" + +# ⒘ [NUMBER SEVENTEEN FULL STOP] +"\u2498" => "17." + +# ⒄ [PARENTHESIZED NUMBER SEVENTEEN] +"\u2484" => "(17)" + +# ⑱ [CIRCLED NUMBER EIGHTEEN] +"\u2471" => "18" + +# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] +"\u24F2" => "18" + +# ⒙ [NUMBER EIGHTEEN FULL STOP] +"\u2499" => "18." + +# ⒅ [PARENTHESIZED NUMBER EIGHTEEN] +"\u2485" => "(18)" + +# ⑲ [CIRCLED NUMBER NINETEEN] +"\u2472" => "19" + +# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] +"\u24F3" => "19" + +# ⒚ [NUMBER NINETEEN FULL STOP] +"\u249A" => "19." + +# ⒆ [PARENTHESIZED NUMBER NINETEEN] +"\u2486" => "(19)" + +# ⑳ [CIRCLED NUMBER TWENTY] +"\u2473" => "20" + +# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] +"\u24F4" => "20" + +# ⒛ [NUMBER TWENTY FULL STOP] +"\u249B" => "20." + +# ⒇ [PARENTHESIZED NUMBER TWENTY] +"\u2487" => "(20)" + +# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] +"\u00AB" => "\"" + +# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] +"\u00BB" => "\"" + +# “ [LEFT DOUBLE QUOTATION MARK] +"\u201C" => "\"" + +# ” [RIGHT DOUBLE QUOTATION MARK] +"\u201D" => "\"" + +# „ [DOUBLE LOW-9 QUOTATION MARK] +"\u201E" => "\"" + +# ″ [DOUBLE PRIME] +"\u2033" => "\"" + +# ‶ [REVERSED DOUBLE PRIME] +"\u2036" => "\"" + +# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] +"\u275D" => "\"" + +# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] +"\u275E" => "\"" + +# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] +"\u276E" => "\"" + +# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] +"\u276F" => "\"" + +# " [FULLWIDTH QUOTATION MARK] +"\uFF02" => "\"" + +# ‘ [LEFT SINGLE QUOTATION MARK] +"\u2018" => "\'" + +# ’ [RIGHT SINGLE QUOTATION MARK] +"\u2019" => "\'" + +# ‚ [SINGLE LOW-9 QUOTATION MARK] +"\u201A" => "\'" + +# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] +"\u201B" => "\'" + +# ′ [PRIME] +"\u2032" => "\'" + +# ‵ [REVERSED PRIME] +"\u2035" => "\'" + +# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] +"\u2039" => "\'" + +# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] +"\u203A" => "\'" + +# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] +"\u275B" => "\'" + +# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] +"\u275C" => "\'" + +# ' [FULLWIDTH APOSTROPHE] +"\uFF07" => "\'" + +# ‐ [HYPHEN] +"\u2010" => "-" + +# ‑ [NON-BREAKING HYPHEN] +"\u2011" => "-" + +# ‒ [FIGURE DASH] +"\u2012" => "-" + +# – [EN DASH] +"\u2013" => "-" + +# — [EM DASH] +"\u2014" => "-" + +# ⁻ [SUPERSCRIPT MINUS] +"\u207B" => "-" + +# ₋ [SUBSCRIPT MINUS] +"\u208B" => "-" + +# - [FULLWIDTH HYPHEN-MINUS] +"\uFF0D" => "-" + +# ⁅ [LEFT SQUARE BRACKET WITH QUILL] +"\u2045" => "[" + +# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] +"\u2772" => "[" + +# ï¼» [FULLWIDTH LEFT SQUARE BRACKET] +"\uFF3B" => "[" + +# ⁆ [RIGHT SQUARE BRACKET WITH QUILL] +"\u2046" => "]" + +# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] +"\u2773" => "]" + +# ï¼½ [FULLWIDTH RIGHT SQUARE BRACKET] +"\uFF3D" => "]" + +# ⁽ [SUPERSCRIPT LEFT PARENTHESIS] +"\u207D" => "(" + +# ₍ [SUBSCRIPT LEFT PARENTHESIS] +"\u208D" => "(" + +# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] +"\u2768" => "(" + +# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] +"\u276A" => "(" + +# ( [FULLWIDTH LEFT PARENTHESIS] +"\uFF08" => "(" + +# ⸨ [LEFT DOUBLE PARENTHESIS] +"\u2E28" => "((" + +# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] +"\u207E" => ")" + +# ₎ [SUBSCRIPT RIGHT PARENTHESIS] +"\u208E" => ")" + +# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] +"\u2769" => ")" + +# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] +"\u276B" => ")" + +# ) [FULLWIDTH RIGHT PARENTHESIS] +"\uFF09" => ")" + +# ⸩ [RIGHT DOUBLE PARENTHESIS] +"\u2E29" => "))" + +# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] +"\u276C" => "<" + +# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] +"\u2770" => "<" + +# < [FULLWIDTH LESS-THAN SIGN] +"\uFF1C" => "<" + +# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] +"\u276D" => ">" + +# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] +"\u2771" => ">" + +# > [FULLWIDTH GREATER-THAN SIGN] +"\uFF1E" => ">" + +# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] +"\u2774" => "{" + +# { [FULLWIDTH LEFT CURLY BRACKET] +"\uFF5B" => "{" + +# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] +"\u2775" => "}" + +# } [FULLWIDTH RIGHT CURLY BRACKET] +"\uFF5D" => "}" + +# ⁺ [SUPERSCRIPT PLUS SIGN] +"\u207A" => "+" + +# ₊ [SUBSCRIPT PLUS SIGN] +"\u208A" => "+" + +# + [FULLWIDTH PLUS SIGN] +"\uFF0B" => "+" + +# ⁼ [SUPERSCRIPT EQUALS SIGN] +"\u207C" => "=" + +# ₌ [SUBSCRIPT EQUALS SIGN] +"\u208C" => "=" + +# = [FULLWIDTH EQUALS SIGN] +"\uFF1D" => "=" + +# ! [FULLWIDTH EXCLAMATION MARK] +"\uFF01" => "!" + +# ‼ [DOUBLE EXCLAMATION MARK] +"\u203C" => "!!" + +# ⁉ [EXCLAMATION QUESTION MARK] +"\u2049" => "!?" + +# # [FULLWIDTH NUMBER SIGN] +"\uFF03" => "#" + +# $ [FULLWIDTH DOLLAR SIGN] +"\uFF04" => "$" + +# ⁒ [COMMERCIAL MINUS SIGN] +"\u2052" => "%" + +# % [FULLWIDTH PERCENT SIGN] +"\uFF05" => "%" + +# & [FULLWIDTH AMPERSAND] +"\uFF06" => "&" + +# ⁎ [LOW ASTERISK] +"\u204E" => "*" + +# * [FULLWIDTH ASTERISK] +"\uFF0A" => "*" + +# , [FULLWIDTH COMMA] +"\uFF0C" => "," + +# . [FULLWIDTH FULL STOP] +"\uFF0E" => "." + +# ⁄ [FRACTION SLASH] +"\u2044" => "/" + +# / [FULLWIDTH SOLIDUS] +"\uFF0F" => "/" + +# : [FULLWIDTH COLON] +"\uFF1A" => ":" + +# ⁏ [REVERSED SEMICOLON] +"\u204F" => ";" + +# ; [FULLWIDTH SEMICOLON] +"\uFF1B" => ";" + +# ? [FULLWIDTH QUESTION MARK] +"\uFF1F" => "?" + +# ⁇ [DOUBLE QUESTION MARK] +"\u2047" => "??" + +# ⁈ [QUESTION EXCLAMATION MARK] +"\u2048" => "?!" + +# ï¼  [FULLWIDTH COMMERCIAL AT] +"\uFF20" => "@" + +# ï¼¼ [FULLWIDTH REVERSE SOLIDUS] +"\uFF3C" => "\\" + +# ‸ [CARET] +"\u2038" => "^" + +# ï¼¾ [FULLWIDTH CIRCUMFLEX ACCENT] +"\uFF3E" => "^" + +# _ [FULLWIDTH LOW LINE] +"\uFF3F" => "_" + +# ⁓ [SWUNG DASH] +"\u2053" => "~" + +# ~ [FULLWIDTH TILDE] +"\uFF5E" => "~" + +################################################################ +# Below is the Perl script used to generate the above mappings # +# from ASCIIFoldingFilter.java: # +################################################################ +# +# #!/usr/bin/perl +# +# use warnings; +# use strict; +# +# my @source_chars = (); +# my @source_char_descriptions = (); +# my $target = ''; +# +# while (<>) { +# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) { +# push @source_chars, $1; +# push @source_char_descriptions, $2; +# next; +# } +# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) { +# $target .= $1; +# next; +# } +# if (/break;/) { +# $target = "\\\"" if ($target eq '"'); +# for my $source_char_num (0..$#source_chars) { +# print "# $source_char_descriptions[$source_char_num]\n"; +# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n"; +# } +# @source_chars = (); +# @source_char_descriptions = (); +# $target = ''; +# } +# } diff --git a/zookeeper/solr/collection1/conf/mapping-ISOLatin1Accent.txt b/zookeeper/solr/collection1/conf/mapping-ISOLatin1Accent.txt new file mode 100644 index 0000000..ede7742 --- /dev/null +++ b/zookeeper/solr/collection1/conf/mapping-ISOLatin1Accent.txt @@ -0,0 +1,246 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Syntax: +# "source" => "target" +# "source".length() > 0 (source cannot be empty.) +# "target".length() >= 0 (target can be empty.) + +# example: +# "À" => "A" +# "\u00C0" => "A" +# "\u00C0" => "\u0041" +# "ß" => "ss" +# "\t" => " " +# "\n" => "" + +# À => A +"\u00C0" => "A" + +# Á => A +"\u00C1" => "A" + +#  => A +"\u00C2" => "A" + +# à => A +"\u00C3" => "A" + +# Ä => A +"\u00C4" => "A" + +# Å => A +"\u00C5" => "A" + +# Æ => AE +"\u00C6" => "AE" + +# Ç => C +"\u00C7" => "C" + +# È => E +"\u00C8" => "E" + +# É => E +"\u00C9" => "E" + +# Ê => E +"\u00CA" => "E" + +# Ë => E +"\u00CB" => "E" + +# Ì => I +"\u00CC" => "I" + +# Í => I +"\u00CD" => "I" + +# Î => I +"\u00CE" => "I" + +# Ï => I +"\u00CF" => "I" + +# IJ => IJ +"\u0132" => "IJ" + +# Ð => D +"\u00D0" => "D" + +# Ñ => N +"\u00D1" => "N" + +# Ò => O +"\u00D2" => "O" + +# Ó => O +"\u00D3" => "O" + +# Ô => O +"\u00D4" => "O" + +# Õ => O +"\u00D5" => "O" + +# Ö => O +"\u00D6" => "O" + +# Ø => O +"\u00D8" => "O" + +# Œ => OE +"\u0152" => "OE" + +# Þ +"\u00DE" => "TH" + +# Ù => U +"\u00D9" => "U" + +# Ú => U +"\u00DA" => "U" + +# Û => U +"\u00DB" => "U" + +# Ü => U +"\u00DC" => "U" + +# Ý => Y +"\u00DD" => "Y" + +# Ÿ => Y +"\u0178" => "Y" + +# à => a +"\u00E0" => "a" + +# á => a +"\u00E1" => "a" + +# â => a +"\u00E2" => "a" + +# ã => a +"\u00E3" => "a" + +# ä => a +"\u00E4" => "a" + +# Ã¥ => a +"\u00E5" => "a" + +# æ => ae +"\u00E6" => "ae" + +# ç => c +"\u00E7" => "c" + +# è => e +"\u00E8" => "e" + +# é => e +"\u00E9" => "e" + +# ê => e +"\u00EA" => "e" + +# ë => e +"\u00EB" => "e" + +# ì => i +"\u00EC" => "i" + +# í => i +"\u00ED" => "i" + +# î => i +"\u00EE" => "i" + +# ï => i +"\u00EF" => "i" + +# ij => ij +"\u0133" => "ij" + +# ð => d +"\u00F0" => "d" + +# ñ => n +"\u00F1" => "n" + +# ò => o +"\u00F2" => "o" + +# ó => o +"\u00F3" => "o" + +# ô => o +"\u00F4" => "o" + +# õ => o +"\u00F5" => "o" + +# ö => o +"\u00F6" => "o" + +# ø => o +"\u00F8" => "o" + +# œ => oe +"\u0153" => "oe" + +# ß => ss +"\u00DF" => "ss" + +# þ => th +"\u00FE" => "th" + +# ù => u +"\u00F9" => "u" + +# ú => u +"\u00FA" => "u" + +# û => u +"\u00FB" => "u" + +# ü => u +"\u00FC" => "u" + +# ý => y +"\u00FD" => "y" + +# ÿ => y +"\u00FF" => "y" + +# ff => ff +"\uFB00" => "ff" + +# fi => fi +"\uFB01" => "fi" + +# fl => fl +"\uFB02" => "fl" + +# ffi => ffi +"\uFB03" => "ffi" + +# ffl => ffl +"\uFB04" => "ffl" + +# ſt => ft +"\uFB05" => "ft" + +# st => st +"\uFB06" => "st" diff --git a/zookeeper/solr/collection1/conf/protwords.txt b/zookeeper/solr/collection1/conf/protwords.txt new file mode 100644 index 0000000..1dfc0ab --- /dev/null +++ b/zookeeper/solr/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/zookeeper/solr/collection1/conf/schema.xml b/zookeeper/solr/collection1/conf/schema.xml new file mode 100644 index 0000000..27b6dff --- /dev/null +++ b/zookeeper/solr/collection1/conf/schema.xml @@ -0,0 +1,666 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zookeeper/solr/collection1/conf/scripts.conf b/zookeeper/solr/collection1/conf/scripts.conf new file mode 100644 index 0000000..f58b262 --- /dev/null +++ b/zookeeper/solr/collection1/conf/scripts.conf @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +user= +solr_hostname=localhost +solr_port=8983 +rsyncd_port=18983 +data_dir= +webapp_name=solr +master_host= +master_data_dir= +master_status_dir= diff --git a/zookeeper/solr/collection1/conf/solrconfig.xml b/zookeeper/solr/collection1/conf/solrconfig.xml new file mode 100644 index 0000000..156314d --- /dev/null +++ b/zookeeper/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,1823 @@ + + + + + + + + + 4.4 + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + true + ignored_ + + + true + links + ignored_ + + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/zookeeper/solr/collection1/conf/spellings.txt b/zookeeper/solr/collection1/conf/spellings.txt new file mode 100644 index 0000000..d7ede6f --- /dev/null +++ b/zookeeper/solr/collection1/conf/spellings.txt @@ -0,0 +1,2 @@ +pizza +history \ No newline at end of file diff --git a/zookeeper/solr/collection1/conf/stopwords.txt b/zookeeper/solr/collection1/conf/stopwords.txt new file mode 100644 index 0000000..ae1e83e --- /dev/null +++ b/zookeeper/solr/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/zookeeper/solr/collection1/conf/synonyms.txt b/zookeeper/solr/collection1/conf/synonyms.txt new file mode 100644 index 0000000..7f72128 --- /dev/null +++ b/zookeeper/solr/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/zookeeper/solr/collection1/conf/update-script.js b/zookeeper/solr/collection1/conf/update-script.js new file mode 100644 index 0000000..49b07f9 --- /dev/null +++ b/zookeeper/solr/collection1/conf/update-script.js @@ -0,0 +1,53 @@ +/* + This is a basic skeleton JavaScript update processor. + + In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in + the example solrconfig.xml and must be uncommented to be enabled. + + See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details. +*/ + +function processAdd(cmd) { + + doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument + id = doc.getFieldValue("id"); + logger.info("update-script#processAdd: id=" + id); + +// Set a field value: +// doc.setField("foo_s", "whatever"); + +// Get a configuration parameter: +// config_param = params.get('config_param'); // "params" only exists if processor configured with + +// Get a request parameter: +// some_param = req.getParams().get("some_param") + +// Add a field of field names that match a pattern: +// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss +// field_names = doc.getFieldNames().toArray(); +// for(i=0; i < field_names.length; i++) { +// field_name = field_names[i]; +// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); } +// } + +} + +function processDelete(cmd) { + // no-op +} + +function processMergeIndexes(cmd) { + // no-op +} + +function processCommit(cmd) { + // no-op +} + +function processRollback(cmd) { + // no-op +} + +function finish() { + // no-op +} diff --git a/zookeeper/solr/collection1/conf/velocity/README.txt b/zookeeper/solr/collection1/conf/velocity/README.txt new file mode 100644 index 0000000..5d560ba --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/README.txt @@ -0,0 +1,101 @@ +Introduction +------------ +Solr Search Velocity Templates + +A quick demo of using Solr using http://wiki.apache.org/solr/VelocityResponseWriter + +You typically access these templates via: + http://localhost:8983/solr/collection1/browse + +It's called "browse" because you can click around with your mouse +without needing to type any search terms. And of course it +also works as a standard search app as well. + +Known Limitations +----------------- +* The /browse and the VelocityResponseWriter component + serve content directly from Solr, which usually requires + Solr's HTTP API to be exposed. Advanced users could + potentially access other parts of Solr directly. +* There are some hard coded fields in these templates. + Since these templates live under conf, they should be + considered part of the overall configuration, and + must be coordinated with schema.xml and solrconfig.xml + +Velocity Info +------------- +Java-based template language. + +It's nice in this context because change to the templates +are immediately visible in browser on the next visit. + +Links: + http://velocity.apache.org + http://wiki.apache.org/velocity/ + http://velocity.apache.org/engine/releases/velocity-1.7/user-guide.html + + +File List +--------- + +System and Misc: + VM_global_library.vm - Macros used other templates, + exact filename is important for Velocity to see it + error.vm - shows errors, if any + debug.vm - includes toggle links for "explain" and "all fields" + activated by debug link in footer.vm + README.txt - this file + +Overall Page Composition: + browse.vm - Main entry point into templates + layout.vm - overall HTML page layout + head.vm - elements in the section of the HTML document + header.vm - top section of page visible to users + footer.vm - bottom section of page visible to users, + includes debug and help links + main.css - CSS style for overall pages + see also jquery.autocomplete.css + +Query Form and Options: + query_form.vm - renders query form + query_group.vm - group by fields + e.g.: Manufacturer or Poplularity + query_spatial.vm - select box for location based Geospacial search + +Spelling Suggestions: + did_you_mean.vm - hyperlinked spelling suggestions in results + suggest.vm - dynamic spelling suggestions + as you type in the search form + jquery.autocomplete.js - supporting files for dynamic suggestions + jquery.autocomplete.css - Most CSS is defined in main.css + + +Search Results, General: + (see also browse.vm) + tabs.vm - provides navigation to advanced search options + pagination_top.vm - paging and staticis at top of results + pagination_bottom.vm - paging and staticis at bottom of results + results_list.vm + hit.vm - called for each matching doc, + decides which template to use + hit_grouped.vm - display results grouped by field values + product_doc.vm - display a Product + join_doc.vm - display a joined document + richtext_doc.vm - display a complex/misc. document + hit_plain.vm - basic display of all fields, + edit results_list.vm to enable this + + +Search Results, Facets & Clusters: + facets.vm - calls the 4 facet and 1 cluster template + facet_fields.vm - display facets based on field values + e.g.: fields specified by &facet.field= + facet_queries.vm - display facets based on specific facet queries + e.g.: facets specified by &facet.query= + facet_ranges.vm - display facets based on ranges + e.g.: ranges specified by &facet.range= + facet_pivot.vm - display pivot based facets + e.g.: facets specified by &facet.pivot= + cluster.vm - if clustering is available + then call cluster_results.vm + cluster_results.vm - actual rendering of clusters diff --git a/zookeeper/solr/collection1/conf/velocity/VM_global_library.vm b/zookeeper/solr/collection1/conf/velocity/VM_global_library.vm new file mode 100644 index 0000000..5dda07c --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/VM_global_library.vm @@ -0,0 +1,175 @@ +#** + * Global macros used by other templates. + * This file must be named VM_global_library.vm + * in order for Velocity to find it. + *# + +#macro(param $key)$request.params.get($key)#end + +#macro(url_root)/solr#end + +## TODO: s/url_for_solr/url_for_core/ and s/url_root/url_for_solr/ +#macro(core_name)$request.core.name#end +#macro(url_for_solr)#{url_root}#if($request.core.name != "")/$request.core.name#end#end +#macro(url_for_home)#url_for_solr/browse#end + +#macro(q)&q=$!{esc.url($params.get('q'))}#end + +#macro(fqs $p)#foreach($fq in $p)#if($velocityCount>1)&#{end}fq=$esc.url($fq)#end#end + +#macro(debug)#if($request.params.get('debugQuery'))&debugQuery=true#end#end + +#macro(boostPrice)#if($request.params.get('bf') == 'price')&bf=price#end#end + +#macro(annotate)#if($request.params.get('annotateBrowse'))&annotateBrowse=true#end#end + +#macro(annTitle $msg)#if($annotate == true)title="$msg"#end#end + +#macro(spatial)#if($request.params.get('sfield'))&sfield=store#end#if($request.params.get('pt'))&pt=$request.params.get('pt')#end#if($request.params.get('d'))&d=$request.params.get('d')#end#end + +#macro(qOpts)#set($queryOpts = $request.params.get("queryOpts"))#if($queryOpts && $queryOpts != "")&queryOpts=$queryOpts#end#end + +#macro(group)#if($request.params.getBool("group") == true)&group=true#end#if($request.params.get("group.field"))#foreach($grp in $request.params.getParams('group.field'))&group.field=$grp#end#end#end + +#macro(sort $p)#if($p)#foreach($s in $p)&sort=$esc.url($s)#end#end#end + +#macro(lensNoQ)?#if($request.params.getParams('fq') and $list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end#sort($request.params.getParams('sort'))#debug#boostPrice#annotate#spatial#qOpts#group#end +#macro(lens)#lensNoQ#q#end + + +#macro(url_for_lens)#{url_for_home}#lens#end + +#macro(url_for_start $start)#url_for_home#lens&start=$start#end + +#macro(url_for_filters $p)#url_for_home?#q#boostPrice#spatial#qOpts#if($list.size($p) > 0)&#fqs($p)#end#debug#end + +#macro(url_for_nested_facet_query $field)#url_for_home#lens&fq=$esc.url($field)#end + +## TODO: convert to use {!raw f=$field}$value (with escaping of course) +#macro(url_for_facet_filter $field $value)#url_for_home#lens&fq=$esc.url($field):%22$esc.url($value)%22#end + +#macro(url_for_facet_date_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end + +#macro(url_for_facet_range_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end + + +#macro(link_to_previous_page $text) + #if($page.current_page_number > 1) + #set($prev_start = $page.start - $page.results_per_page) + $text + #end +#end + +#macro(link_to_next_page $text) + #if($page.current_page_number < $page.page_count) + #set($next_start = $page.start + $page.results_per_page) + $text + #end +#end + +#macro(link_to_page $page_number $text) + #if($page_number == $page.current_page_number) + $text + #else + #if($page_number <= $page.page_count) + #set($page_start = $page_number * $page.results_per_page - $page.results_per_page) + $text + #end + #end +#end + +#macro(display_facet_query $field, $display, $fieldName) + #if($field.size() > 0) + $display +
    + #foreach ($facet in $field) + #if ($facet.value > 0) + #set($facetURL = "#url_for_nested_facet_query($facet.key)") + #if ($facetURL != '') +
  • $facet.key ($facet.value)
  • + #end + #end + #end +
+ #end +#end + + +#macro(display_facet_range $field, $display, $fieldName, $start, $end, $gap, $before, $after) + $display +
    + #if($before && $before != "") + #set($value = "[* TO " + "#format_value($start)" + "}") + #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)") +
  • Less than #format_value($start) ($before)
  • + #end + #foreach ($facet in $field) + #set($rangeEnd = "#range_get_to_value($facet.key, $gap)") + #set($value = "[" + $facet.key + " TO " + $rangeEnd + "}") + #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)") + #if ($facetURL != '') +
  • $facet.key - #format_value($rangeEnd) ($facet.value)
  • + #end + #end + #if($end && $end != "" && $after > 0) + #set($value = "[" + "#format_value($end)" + " TO *}") + #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)") +
  • More than #format_value($end) ($after)
  • + #end +
+#end + +## $pivots is a list of facet_pivot +#macro(display_facet_pivot $pivots, $display) + #if($pivots.size() > 0) + $display + + #end +#end + +#macro(field $f) + #if($response.response.highlighting.get($docId).get($f).get(0)) + #set($pad = "") + #foreach($v in $response.response.highlighting.get($docId).get($f)) +$pad$v## + #set($pad = " ... ") + #end + #else + #foreach($v in $doc.getFieldValues($f)) +$v## + #end + #end +#end + +#macro(utc_date $theDate) +$date.format("yyyy-MM-dd'T'HH:mm:ss'Z'",$theDate,$date.getLocale(),$date.getTimeZone().getTimeZone("UTC"))## +#end + +#macro(format_value $val) +#if(${val.class.name} == "java.util.Date") +#utc_date($val)## +#else +$val## +#end +#end + +#macro(range_get_to_value $inval, $gapval) +#if(${gapval.class.name} == "java.lang.String") +$inval$gapval## +#elseif(${gapval.class.name} == "java.lang.Float" || ${inval.class.name} == "java.lang.Float") +$math.toDouble($math.add($inval,$gapval))## +#else +$math.add($inval,$gapval)## +#end +#end diff --git a/zookeeper/solr/collection1/conf/velocity/browse.vm b/zookeeper/solr/collection1/conf/velocity/browse.vm new file mode 100644 index 0000000..10ecaeb --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/browse.vm @@ -0,0 +1,33 @@ +#** + * Main entry point into the /browse templates + *# + +#set($searcher = $request.searcher) +#set($params = $request.params) +#set($clusters = $response.response.clusters) +#set($mltResults = $response.response.get("moreLikeThis")) +#set($annotate = $params.get("annotateBrowse")) +#parse('query_form.vm') +#parse('did_you_mean.vm') + + + + + +## Show Error Message, if any +
+ #parse("error.vm") +
+ +## Render Results, actual matching docs +
+ #parse("results_list.vm") +
+ + diff --git a/zookeeper/solr/collection1/conf/velocity/cluster.vm b/zookeeper/solr/collection1/conf/velocity/cluster.vm new file mode 100644 index 0000000..4957071 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/cluster.vm @@ -0,0 +1,19 @@ +#** + * Check if Clustering is Enabled and then + * call cluster_results.vm + *# + +

+ Clusters +

+ +## Div tag has placeholder text by default +
+ Run Solr with java -Dsolr.clustering.enabled=true -jar start.jar to see results +
+ +## Replace the div content *if* Carrot^2 is available + diff --git a/zookeeper/solr/collection1/conf/velocity/cluster_results.vm b/zookeeper/solr/collection1/conf/velocity/cluster_results.vm new file mode 100644 index 0000000..204480d --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/cluster_results.vm @@ -0,0 +1,31 @@ +#** + * Actual rendering of Clusters + *# + +## For each cluster +#foreach ($clusters in $response.response.clusters) + + #set($labels = $clusters.get('labels')) + #set($docs = $clusters.get('docs')) + + ## This Cluster's Heading +

+ #foreach ($label in $labels) + ## Keep the following line together to prevent + ## a space appearing before each comma + $label#if( $foreach.hasNext ),#end + #end +

+ + ## This Cluster's Documents +
    + ## For each doc in this cluster + #foreach ($cluDoc in $docs) +
  1. + + $cluDoc +
  2. + #end +
+ +#end ## end for each Cluster diff --git a/zookeeper/solr/collection1/conf/velocity/debug.vm b/zookeeper/solr/collection1/conf/velocity/debug.vm new file mode 100644 index 0000000..8f6d232 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/debug.vm @@ -0,0 +1,28 @@ +#** + * Show Debugging Information, if enabled + *# + +#if( $params.getBool("debugQuery",false) ) + + toggle explain + +
+    $response.getExplainMap().get($doc.getFirstValue('id'))
+  
+ + + toggle all fields + + + #foreach($fieldname in $doc.fieldNames) +
+ $fieldname : + + #foreach($value in $doc.getFieldValues($fieldname)) + $esc.html($value) + #end + +
+ #end +
+#end diff --git a/zookeeper/solr/collection1/conf/velocity/did_you_mean.vm b/zookeeper/solr/collection1/conf/velocity/did_you_mean.vm new file mode 100644 index 0000000..b8faaa5 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/did_you_mean.vm @@ -0,0 +1,9 @@ +#** + * Hyperlinked spelling suggestions in results list + *# + +#set($dym = $response.response.spellcheck.suggestions.collation.collationQuery) +#if($dym) + Did you mean + $esc.html($dym)? +#end diff --git a/zookeeper/solr/collection1/conf/velocity/error.vm b/zookeeper/solr/collection1/conf/velocity/error.vm new file mode 100644 index 0000000..80b5819 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/error.vm @@ -0,0 +1,11 @@ +#** + * Show Error Message, if any + *# + +## Show Error Message, if any +## Usually rendered inside div class=error + +#if( $response.response.error.code ) +

ERROR $response.response.error.code

+ $response.response.error.msg +#end diff --git a/zookeeper/solr/collection1/conf/velocity/facet_fields.vm b/zookeeper/solr/collection1/conf/velocity/facet_fields.vm new file mode 100644 index 0000000..d9db659 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/facet_fields.vm @@ -0,0 +1,23 @@ +#** + * Display facets based on field values + * e.g.: fields specified by &facet.field= + *# + +#if($response.facetFields) +

+ Field Facets +

+ #foreach($field in $response.facetFields) + ## Hide facets without value + #if($field.values.size() > 0) + $field.name +
    + #foreach($facet in $field.values) +
  • + $facet.name ($facet.count) +
  • + #end +
+ #end ## end if > 0 + #end ## end for each facet field +#end ## end if response has facet fields diff --git a/zookeeper/solr/collection1/conf/velocity/facet_pivot.vm b/zookeeper/solr/collection1/conf/velocity/facet_pivot.vm new file mode 100644 index 0000000..7aa50da --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/facet_pivot.vm @@ -0,0 +1,12 @@ +#** + * Display Pivot-Based Facets + * e.g.: facets specified by &facet.pivot= + *# + +

+ Pivot Facets +

+ +#set($pivot = $response.response.facet_counts.facet_pivot) + +#display_facet_pivot($pivot, "") diff --git a/zookeeper/solr/collection1/conf/velocity/facet_queries.vm b/zookeeper/solr/collection1/conf/velocity/facet_queries.vm new file mode 100644 index 0000000..37489c7 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/facet_queries.vm @@ -0,0 +1,12 @@ +#** + * Display facets based on specific facet queries + * e.g.: facets specified by &facet.query= + *# + +#set($field = $response.response.facet_counts.facet_queries) + +

+ Query Facets +

+ +#display_facet_query($field, "", "") diff --git a/zookeeper/solr/collection1/conf/velocity/facet_ranges.vm b/zookeeper/solr/collection1/conf/velocity/facet_ranges.vm new file mode 100644 index 0000000..a61084b --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/facet_ranges.vm @@ -0,0 +1,23 @@ +#** + * Display facets based on ranges of values, AKA "Bukets" + * e.g.: ranges specified by &facet.range= + *# + +

+ Range Facets +

+ +#foreach ($field in $response.response.facet_counts.facet_ranges) + ## Hide facets without value + #if($field.value.counts.size() > 0) + #set($name = $field.key) + #set($display = $name) + #set($f = $field.value.counts) + #set($start = $field.value.start) + #set($end = $field.value.end) + #set($gap = $field.value.gap) + #set($before = $field.value.before) + #set($after = $field.value.after) + #display_facet_range($f, $display, $name, $start, $end, $gap, $before, $after) + #end ## end if has any values +#end ## end for each facet range diff --git a/zookeeper/solr/collection1/conf/velocity/facets.vm b/zookeeper/solr/collection1/conf/velocity/facets.vm new file mode 100644 index 0000000..55d40c9 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/facets.vm @@ -0,0 +1,10 @@ +#** + * Overall Facet display block + * Invokes the 4 facet and 1 cluster template + *# + +#parse('facet_fields.vm') +#parse('facet_queries.vm') +#parse('facet_ranges.vm') +#parse('facet_pivot.vm') +#parse('cluster.vm') diff --git a/zookeeper/solr/collection1/conf/velocity/footer.vm b/zookeeper/solr/collection1/conf/velocity/footer.vm new file mode 100644 index 0000000..0604c34 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/footer.vm @@ -0,0 +1,43 @@ +#** + * Render the bottom section of the page visible to users + *# + +
+
+ Options: + + #if($request.params.get('debugQuery')) + + disable debug + #else + + enable debug + #end + - + #if($annotate) + + disable annotation + #else + + enable annotation + #end + - + + XML results + +
+ +
+ Generated by VelocityResponseWriter +
+
+ Documentation: + Solr Home Page, + Solr Wiki +
+
+ Disclaimer: + The locations displayed in this demonstration are purely fictional. + It is more than likely that no store with the items listed actually + exists at that location! +
diff --git a/zookeeper/solr/collection1/conf/velocity/head.vm b/zookeeper/solr/collection1/conf/velocity/head.vm new file mode 100644 index 0000000..d1f6ee6 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/head.vm @@ -0,0 +1,35 @@ +#** + * Provide elements for the section of the HTML document + *# + + ## An example of using an arbitrary request parameter + #param('title') + + + + + + + + + diff --git a/zookeeper/solr/collection1/conf/velocity/header.vm b/zookeeper/solr/collection1/conf/velocity/header.vm new file mode 100644 index 0000000..6866047 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/header.vm @@ -0,0 +1,7 @@ +#** + * Render the top section of the page visible to users + *# + + diff --git a/zookeeper/solr/collection1/conf/velocity/hit.vm b/zookeeper/solr/collection1/conf/velocity/hit.vm new file mode 100644 index 0000000..a9c11f4 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/hit.vm @@ -0,0 +1,25 @@ +#** + * Called for each matching document but then + * calls one of product_doc, join_doc or richtext_doc + * depending on which fields the doc has + *# + +#set($docId = $doc.getFieldValue('id')) + +
+ + ## Has a "name" field ? + #if($doc.getFieldValue('name')) + #parse("product_doc.vm") + + ## Has a "compName_s" field ? + #elseif($doc.getFieldValue('compName_s')) + #parse("join_doc.vm") + + ## Fallback to richtext_doc + #else + #parse("richtext_doc.vm") + + #end + +
diff --git a/zookeeper/solr/collection1/conf/velocity/hit_grouped.vm b/zookeeper/solr/collection1/conf/velocity/hit_grouped.vm new file mode 100644 index 0000000..5297f1e --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/hit_grouped.vm @@ -0,0 +1,43 @@ +#** + * Display grouped results + *# + +
+ +
+ $grouping.key +
+ +
+ Total Matches in Group: $grouping.value.matches +
+ +
## list of groups + + #foreach ($group in $grouping.value.groups) +
+ #if($group.groupValue)$group.groupValue#{else}No group#end + + ($group.doclist.numFound) + +
+ +
+ #foreach ($doc in $group.doclist) + #set($docId = $doc.getFieldValue('id')) + #if($doc.getFieldValue('name')) + #parse("product_doc.vm") + #elseif($doc.getFieldValue('compName_s')) + #parse("join_doc.vm") + #else + #parse("richtext_doc.vm") + #end + #end +
+ + #end ## end of foreach group in grouping.value.groups +
## div tag for entire list of groups + +
## end of div class=result-document diff --git a/zookeeper/solr/collection1/conf/velocity/hit_plain.vm b/zookeeper/solr/collection1/conf/velocity/hit_plain.vm new file mode 100644 index 0000000..193439b --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/hit_plain.vm @@ -0,0 +1,25 @@ +#** + * An extremely plain / debug version of hit.vm + *# + + + ## For each field + #foreach( $fieldName in $doc.fieldNames ) + ## For each value + #foreach( $value in $doc.getFieldValues($fieldName) ) + + ## Field Name + + ## Field Value(s) + + + #end ## end for each value + #end ## end for each field +
+ #if( $foreach.count == 1 ) + $fieldName: + #end + + $esc.html($value)
+
+
diff --git a/zookeeper/solr/collection1/conf/velocity/join_doc.vm b/zookeeper/solr/collection1/conf/velocity/join_doc.vm new file mode 100644 index 0000000..9956012 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/join_doc.vm @@ -0,0 +1,20 @@ +#** + * Display documents that are joined to other documents + *# + +
+ #field('compName_s') +
+ +
+ Id: #field('id') + (company-details document for + join + ) +
+ +
+ Address: #field('address_s') +
+ +#parse('debug.vm') diff --git a/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.css b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.css new file mode 100644 index 0000000..91b6228 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.css @@ -0,0 +1,48 @@ +.ac_results { + padding: 0px; + border: 1px solid black; + background-color: white; + overflow: hidden; + z-index: 99999; +} + +.ac_results ul { + width: 100%; + list-style-position: outside; + list-style: none; + padding: 0; + margin: 0; +} + +.ac_results li { + margin: 0px; + padding: 2px 5px; + cursor: default; + display: block; + /* + if width will be 100% horizontal scrollbar will apear + when scroll mode will be used + */ + /*width: 100%;*/ + font: menu; + font-size: 12px; + /* + it is very important, if line-height not setted or setted + in relative units scroll will be broken in firefox + */ + line-height: 16px; + overflow: hidden; +} + +.ac_loading { + background: white url('indicator.gif') right center no-repeat; +} + +.ac_odd { + background-color: #eee; +} + +.ac_over { + background-color: #0A246A; + color: white; +} diff --git a/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.js b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.js new file mode 100644 index 0000000..09bb376 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.js @@ -0,0 +1,763 @@ +/* + * Autocomplete - jQuery plugin 1.1pre + * + * Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer + * + * Dual licensed under the MIT and GPL licenses: + * http://www.opensource.org/licenses/mit-license.php + * http://www.gnu.org/licenses/gpl.html + * + * Revision: $Id: jquery.autocomplete.js 5785 2008-07-12 10:37:33Z joern.zaefferer $ + * + */ + +;(function($) { + +$.fn.extend({ + autocomplete: function(urlOrData, options) { + var isUrl = typeof urlOrData == "string"; + options = $.extend({}, $.Autocompleter.defaults, { + url: isUrl ? urlOrData : null, + data: isUrl ? null : urlOrData, + delay: isUrl ? $.Autocompleter.defaults.delay : 10, + max: options && !options.scroll ? 10 : 150 + }, options); + + // if highlight is set to false, replace it with a do-nothing function + options.highlight = options.highlight || function(value) { return value; }; + + // if the formatMatch option is not specified, then use formatItem for backwards compatibility + options.formatMatch = options.formatMatch || options.formatItem; + + return this.each(function() { + new $.Autocompleter(this, options); + }); + }, + result: function(handler) { + return this.bind("result", handler); + }, + search: function(handler) { + return this.trigger("search", [handler]); + }, + flushCache: function() { + return this.trigger("flushCache"); + }, + setOptions: function(options){ + return this.trigger("setOptions", [options]); + }, + unautocomplete: function() { + return this.trigger("unautocomplete"); + } +}); + +$.Autocompleter = function(input, options) { + + var KEY = { + UP: 38, + DOWN: 40, + DEL: 46, + TAB: 9, + RETURN: 13, + ESC: 27, + COMMA: 188, + PAGEUP: 33, + PAGEDOWN: 34, + BACKSPACE: 8 + }; + + // Create $ object for input element + var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass); + + var timeout; + var previousValue = ""; + var cache = $.Autocompleter.Cache(options); + var hasFocus = 0; + var lastKeyPressCode; + var config = { + mouseDownOnSelect: false + }; + var select = $.Autocompleter.Select(options, input, selectCurrent, config); + + var blockSubmit; + + // prevent form submit in opera when selecting with return key + $.browser.opera && $(input.form).bind("submit.autocomplete", function() { + if (blockSubmit) { + blockSubmit = false; + return false; + } + }); + + // only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all + $input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) { + // track last key pressed + lastKeyPressCode = event.keyCode; + switch(event.keyCode) { + + case KEY.UP: + event.preventDefault(); + if ( select.visible() ) { + select.prev(); + } else { + onChange(0, true); + } + break; + + case KEY.DOWN: + event.preventDefault(); + if ( select.visible() ) { + select.next(); + } else { + onChange(0, true); + } + break; + + case KEY.PAGEUP: + event.preventDefault(); + if ( select.visible() ) { + select.pageUp(); + } else { + onChange(0, true); + } + break; + + case KEY.PAGEDOWN: + event.preventDefault(); + if ( select.visible() ) { + select.pageDown(); + } else { + onChange(0, true); + } + break; + + // matches also semicolon + case options.multiple && $.trim(options.multipleSeparator) == "," && KEY.COMMA: + case KEY.TAB: + case KEY.RETURN: + if( selectCurrent() ) { + // stop default to prevent a form submit, Opera needs special handling + event.preventDefault(); + blockSubmit = true; + return false; + } + break; + + case KEY.ESC: + select.hide(); + break; + + default: + clearTimeout(timeout); + timeout = setTimeout(onChange, options.delay); + break; + } + }).focus(function(){ + // track whether the field has focus, we shouldn't process any + // results if the field no longer has focus + hasFocus++; + }).blur(function() { + hasFocus = 0; + if (!config.mouseDownOnSelect) { + hideResults(); + } + }).click(function() { + // show select when clicking in a focused field + if ( hasFocus++ > 1 && !select.visible() ) { + onChange(0, true); + } + }).bind("search", function() { + // TODO why not just specifying both arguments? + var fn = (arguments.length > 1) ? arguments[1] : null; + function findValueCallback(q, data) { + var result; + if( data && data.length ) { + for (var i=0; i < data.length; i++) { + if( data[i].result.toLowerCase() == q.toLowerCase() ) { + result = data[i]; + break; + } + } + } + if( typeof fn == "function" ) fn(result); + else $input.trigger("result", result && [result.data, result.value]); + } + $.each(trimWords($input.val()), function(i, value) { + request(value, findValueCallback, findValueCallback); + }); + }).bind("flushCache", function() { + cache.flush(); + }).bind("setOptions", function() { + $.extend(options, arguments[1]); + // if we've updated the data, repopulate + if ( "data" in arguments[1] ) + cache.populate(); + }).bind("unautocomplete", function() { + select.unbind(); + $input.unbind(); + $(input.form).unbind(".autocomplete"); + }); + + + function selectCurrent() { + var selected = select.selected(); + if( !selected ) + return false; + + var v = selected.result; + previousValue = v; + + if ( options.multiple ) { + var words = trimWords($input.val()); + if ( words.length > 1 ) { + v = words.slice(0, words.length - 1).join( options.multipleSeparator ) + options.multipleSeparator + v; + } + v += options.multipleSeparator; + } + + $input.val(v); + hideResultsNow(); + $input.trigger("result", [selected.data, selected.value]); + return true; + } + + function onChange(crap, skipPrevCheck) { + if( lastKeyPressCode == KEY.DEL ) { + select.hide(); + return; + } + + var currentValue = $input.val(); + + if ( !skipPrevCheck && currentValue == previousValue ) + return; + + previousValue = currentValue; + + currentValue = lastWord(currentValue); + if ( currentValue.length >= options.minChars) { + $input.addClass(options.loadingClass); + if (!options.matchCase) + currentValue = currentValue.toLowerCase(); + request(currentValue, receiveData, hideResultsNow); + } else { + stopLoading(); + select.hide(); + } + }; + + function trimWords(value) { + if ( !value ) { + return [""]; + } + var words = value.split( options.multipleSeparator ); + var result = []; + $.each(words, function(i, value) { + if ( $.trim(value) ) + result[i] = $.trim(value); + }); + return result; + } + + function lastWord(value) { + if ( !options.multiple ) + return value; + var words = trimWords(value); + return words[words.length - 1]; + } + + // fills in the input box w/the first match (assumed to be the best match) + // q: the term entered + // sValue: the first matching result + function autoFill(q, sValue){ + // autofill in the complete box w/the first match as long as the user hasn't entered in more data + // if the last user key pressed was backspace, don't autofill + if( options.autoFill && (lastWord($input.val()).toLowerCase() == q.toLowerCase()) && lastKeyPressCode != KEY.BACKSPACE ) { + // fill in the value (keep the case the user has typed) + $input.val($input.val() + sValue.substring(lastWord(previousValue).length)); + // select the portion of the value not typed by the user (so the next character will erase) + $.Autocompleter.Selection(input, previousValue.length, previousValue.length + sValue.length); + } + }; + + function hideResults() { + clearTimeout(timeout); + timeout = setTimeout(hideResultsNow, 200); + }; + + function hideResultsNow() { + var wasVisible = select.visible(); + select.hide(); + clearTimeout(timeout); + stopLoading(); + if (options.mustMatch) { + // call search and run callback + $input.search( + function (result){ + // if no value found, clear the input box + if( !result ) { + if (options.multiple) { + var words = trimWords($input.val()).slice(0, -1); + $input.val( words.join(options.multipleSeparator) + (words.length ? options.multipleSeparator : "") ); + } + else + $input.val( "" ); + } + } + ); + } + if (wasVisible) + // position cursor at end of input field + $.Autocompleter.Selection(input, input.value.length, input.value.length); + }; + + function receiveData(q, data) { + if ( data && data.length && hasFocus ) { + stopLoading(); + select.display(data, q); + autoFill(q, data[0].value); + select.show(); + } else { + hideResultsNow(); + } + }; + + function request(term, success, failure) { + if (!options.matchCase) + term = term.toLowerCase(); + var data = cache.load(term); + data = null; // Avoid buggy cache and go to Solr every time + // recieve the cached data + if (data && data.length) { + success(term, data); + // if an AJAX url has been supplied, try loading the data now + } else if( (typeof options.url == "string") && (options.url.length > 0) ){ + + var extraParams = { + timestamp: +new Date() + }; + $.each(options.extraParams, function(key, param) { + extraParams[key] = typeof param == "function" ? param() : param; + }); + + $.ajax({ + // try to leverage ajaxQueue plugin to abort previous requests + mode: "abort", + // limit abortion to this input + port: "autocomplete" + input.name, + dataType: options.dataType, + url: options.url, + data: $.extend({ + q: lastWord(term), + limit: options.max + }, extraParams), + success: function(data) { + var parsed = options.parse && options.parse(data) || parse(data); + cache.add(term, parsed); + success(term, parsed); + } + }); + } else { + // if we have a failure, we need to empty the list -- this prevents the the [TAB] key from selecting the last successful match + select.emptyList(); + failure(term); + } + }; + + function parse(data) { + var parsed = []; + var rows = data.split("\n"); + for (var i=0; i < rows.length; i++) { + var row = $.trim(rows[i]); + if (row) { + row = row.split("|"); + parsed[parsed.length] = { + data: row, + value: row[0], + result: options.formatResult && options.formatResult(row, row[0]) || row[0] + }; + } + } + return parsed; + }; + + function stopLoading() { + $input.removeClass(options.loadingClass); + }; + +}; + +$.Autocompleter.defaults = { + inputClass: "ac_input", + resultsClass: "ac_results", + loadingClass: "ac_loading", + minChars: 1, + delay: 400, + matchCase: false, + matchSubset: true, + matchContains: false, + cacheLength: 10, + max: 100, + mustMatch: false, + extraParams: {}, + selectFirst: false, + formatItem: function(row) { return row[0]; }, + formatMatch: null, + autoFill: false, + width: 0, + multiple: false, + multipleSeparator: ", ", + highlight: function(value, term) { + return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "$1"); + }, + scroll: true, + scrollHeight: 180 +}; + +$.Autocompleter.Cache = function(options) { + + var data = {}; + var length = 0; + + function matchSubset(s, sub) { + if (!options.matchCase) + s = s.toLowerCase(); + var i = s.indexOf(sub); + if (options.matchContains == "word"){ + i = s.toLowerCase().search("\\b" + sub.toLowerCase()); + } + if (i == -1) return false; + return i == 0 || options.matchContains; + }; + + function add(q, value) { + if (length > options.cacheLength){ + flush(); + } + if (!data[q]){ + length++; + } + data[q] = value; + } + + function populate(){ + if( !options.data ) return false; + // track the matches + var stMatchSets = {}, + nullData = 0; + + // no url was specified, we need to adjust the cache length to make sure it fits the local data store + if( !options.url ) options.cacheLength = 1; + + // track all options for minChars = 0 + stMatchSets[""] = []; + + // loop through the array and create a lookup structure + for ( var i = 0, ol = options.data.length; i < ol; i++ ) { + var rawValue = options.data[i]; + // if rawValue is a string, make an array otherwise just reference the array + rawValue = (typeof rawValue == "string") ? [rawValue] : rawValue; + + var value = options.formatMatch(rawValue, i+1, options.data.length); + if ( value === false ) + continue; + + var firstChar = value.charAt(0).toLowerCase(); + // if no lookup array for this character exists, look it up now + if( !stMatchSets[firstChar] ) + stMatchSets[firstChar] = []; + + // if the match is a string + var row = { + value: value, + data: rawValue, + result: options.formatResult && options.formatResult(rawValue) || value + }; + + // push the current match into the set list + stMatchSets[firstChar].push(row); + + // keep track of minChars zero items + if ( nullData++ < options.max ) { + stMatchSets[""].push(row); + } + }; + + // add the data items to the cache + $.each(stMatchSets, function(i, value) { + // increase the cache size + options.cacheLength++; + // add to the cache + add(i, value); + }); + } + + // populate any existing data + setTimeout(populate, 25); + + function flush(){ + data = {}; + length = 0; + } + + return { + flush: flush, + add: add, + populate: populate, + load: function(q) { + if (!options.cacheLength || !length) + return null; + /* + * if dealing w/local data and matchContains than we must make sure + * to loop through all the data collections looking for matches + */ + if( !options.url && options.matchContains ){ + // track all matches + var csub = []; + // loop through all the data grids for matches + for( var k in data ){ + // don't search through the stMatchSets[""] (minChars: 0) cache + // this prevents duplicates + if( k.length > 0 ){ + var c = data[k]; + $.each(c, function(i, x) { + // if we've got a match, add it to the array + if (matchSubset(x.value, q)) { + csub.push(x); + } + }); + } + } + return csub; + } else + // if the exact item exists, use it + if (data[q]){ + return data[q]; + } else + if (options.matchSubset) { + for (var i = q.length - 1; i >= options.minChars; i--) { + var c = data[q.substr(0, i)]; + if (c) { + var csub = []; + $.each(c, function(i, x) { + if (matchSubset(x.value, q)) { + csub[csub.length] = x; + } + }); + return csub; + } + } + } + return null; + } + }; +}; + +$.Autocompleter.Select = function (options, input, select, config) { + var CLASSES = { + ACTIVE: "ac_over" + }; + + var listItems, + active = -1, + data, + term = "", + needsInit = true, + element, + list; + + // Create results + function init() { + if (!needsInit) + return; + element = $("
") + .hide() + .addClass(options.resultsClass) + .css("position", "absolute") + .appendTo(document.body); + + list = $("
    ").appendTo(element).mouseover( function(event) { + if(target(event).nodeName && target(event).nodeName.toUpperCase() == 'LI') { + active = $("li", list).removeClass(CLASSES.ACTIVE).index(target(event)); + $(target(event)).addClass(CLASSES.ACTIVE); + } + }).click(function(event) { + $(target(event)).addClass(CLASSES.ACTIVE); + select(); + // TODO provide option to avoid setting focus again after selection? useful for cleanup-on-focus + input.focus(); + return false; + }).mousedown(function() { + config.mouseDownOnSelect = true; + }).mouseup(function() { + config.mouseDownOnSelect = false; + }); + + if( options.width > 0 ) + element.css("width", options.width); + + needsInit = false; + } + + function target(event) { + var element = event.target; + while(element && element.tagName != "LI") + element = element.parentNode; + // more fun with IE, sometimes event.target is empty, just ignore it then + if(!element) + return []; + return element; + } + + function moveSelect(step) { + listItems.slice(active, active + 1).removeClass(CLASSES.ACTIVE); + movePosition(step); + var activeItem = listItems.slice(active, active + 1).addClass(CLASSES.ACTIVE); + if(options.scroll) { + var offset = 0; + listItems.slice(0, active).each(function() { + offset += this.offsetHeight; + }); + if((offset + activeItem[0].offsetHeight - list.scrollTop()) > list[0].clientHeight) { + list.scrollTop(offset + activeItem[0].offsetHeight - list.innerHeight()); + } else if(offset < list.scrollTop()) { + list.scrollTop(offset); + } + } + }; + + function movePosition(step) { + active += step; + if (active < 0) { + active = listItems.size() - 1; + } else if (active >= listItems.size()) { + active = 0; + } + } + + function limitNumberOfItems(available) { + return options.max && options.max < available + ? options.max + : available; + } + + function fillList() { + list.empty(); + var max = limitNumberOfItems(data.length); + for (var i=0; i < max; i++) { + if (!data[i]) + continue; + var formatted = options.formatItem(data[i].data, i+1, max, data[i].value, term); + if ( formatted === false ) + continue; + var li = $("
  • ").html( options.highlight(formatted, term) ).addClass(i%2 == 0 ? "ac_even" : "ac_odd").appendTo(list)[0]; + $.data(li, "ac_data", data[i]); + } + listItems = list.find("li"); + if ( options.selectFirst ) { + listItems.slice(0, 1).addClass(CLASSES.ACTIVE); + active = 0; + } + // apply bgiframe if available + if ( $.fn.bgiframe ) + list.bgiframe(); + } + + return { + display: function(d, q) { + init(); + data = d; + term = q; + fillList(); + }, + next: function() { + moveSelect(1); + }, + prev: function() { + moveSelect(-1); + }, + pageUp: function() { + if (active != 0 && active - 8 < 0) { + moveSelect( -active ); + } else { + moveSelect(-8); + } + }, + pageDown: function() { + if (active != listItems.size() - 1 && active + 8 > listItems.size()) { + moveSelect( listItems.size() - 1 - active ); + } else { + moveSelect(8); + } + }, + hide: function() { + element && element.hide(); + listItems && listItems.removeClass(CLASSES.ACTIVE); + active = -1; + }, + visible : function() { + return element && element.is(":visible"); + }, + current: function() { + return this.visible() && (listItems.filter("." + CLASSES.ACTIVE)[0] || options.selectFirst && listItems[0]); + }, + show: function() { + var offset = $(input).offset(); + element.css({ + width: typeof options.width == "string" || options.width > 0 ? options.width : $(input).width(), + top: offset.top + input.offsetHeight, + left: offset.left + }).show(); + if(options.scroll) { + list.scrollTop(0); + list.css({ + maxHeight: options.scrollHeight, + overflow: 'auto' + }); + + if($.browser.msie && typeof document.body.style.maxHeight === "undefined") { + var listHeight = 0; + listItems.each(function() { + listHeight += this.offsetHeight; + }); + var scrollbarsVisible = listHeight > options.scrollHeight; + list.css('height', scrollbarsVisible ? options.scrollHeight : listHeight ); + if (!scrollbarsVisible) { + // IE doesn't recalculate width when scrollbar disappears + listItems.width( list.width() - parseInt(listItems.css("padding-left")) - parseInt(listItems.css("padding-right")) ); + } + } + + } + }, + selected: function() { + var selected = listItems && listItems.filter("." + CLASSES.ACTIVE).removeClass(CLASSES.ACTIVE); + return selected && selected.length && $.data(selected[0], "ac_data"); + }, + emptyList: function (){ + list && list.empty(); + }, + unbind: function() { + element && element.remove(); + } + }; +}; + +$.Autocompleter.Selection = function(field, start, end) { + if( field.createTextRange ){ + var selRange = field.createTextRange(); + selRange.collapse(true); + selRange.moveStart("character", start); + selRange.moveEnd("character", end); + selRange.select(); + } else if( field.setSelectionRange ){ + field.setSelectionRange(start, end); + } else { + if( field.selectionStart ){ + field.selectionStart = start; + field.selectionEnd = end; + } + } + field.focus(); +}; + +})(jQuery); \ No newline at end of file diff --git a/zookeeper/solr/collection1/conf/velocity/layout.vm b/zookeeper/solr/collection1/conf/velocity/layout.vm new file mode 100644 index 0000000..50f4c1b --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/layout.vm @@ -0,0 +1,24 @@ +#** + * Overall HTML page layout + *# + + + + #parse("head.vm") + + + + +
    + #parse("tabs.vm") +
    +
    + $content +
    + + + diff --git a/zookeeper/solr/collection1/conf/velocity/main.css b/zookeeper/solr/collection1/conf/velocity/main.css new file mode 100644 index 0000000..0aed533 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/main.css @@ -0,0 +1,230 @@ +#admin{ + text-align: right; + vertical-align: top; +} + +#head{ + width: 100%; +} +.array-field { + border: 2px solid #474747; + background: #FFE9D8; + padding: 5px; + margin: 5px; +} + +.array-field-list li { + list-style: circle; + margin-left: 20px; +} + +.parsed_query_header { + font-family: Helvetica, Arial, sans-serif; + font-size: 10pt; + font-weight: bold; +} + +.parsed_query { + font-family: Courier, Courier New, monospaced; + font-size: 10pt; + font-weight: normal; +} + +body { + font-family: Helvetica, Arial, sans-serif; + font-size: 10pt; +} + +a { + color: #43a4b1; +} + +.navigators { + float: left; + margin: 5px; + margin-top: 0px; + width: 185px; + padding: 5px; + top: -20px; + position: relative; +} + +.tabs-bar { + padding: 5px; + width: 100%; + border: 1px solid; + border-width: 0px 0px 1px 0px; +} +.tab { + font-weight: bold; + padding: 5px; + margin: 0px 5px; + border: 1px solid; + background-color: #dddddd; + border-top-left-radius: 4px; + border-top-right-radius: 4px; +} +.tab:hover { + background: #FEC293; +} +.tab.selected { + background-color: #ffffff; + border-bottom: 1px solid #ffffff; +} + +.navigators h2 { + background: #FEC293; + padding: 2px 5px; +} + +.navigators ul { + list-style: none; + margin: 0; + margin-bottom: 5px; + margin-top: 5px; + padding-left: 10px; +} + +.navigators ul li { + color: #999; + padding: 2px; +} + + + +.facet-field { + font-weight: bold; +} + +.highlight { + color: white; + background-color: gray; + border: 1px black solid; +} + +.highlight-box { + margin-left: 15px; +} + +.field-name { + font-weight: bold; +} + +.highlighted-facet-field { + background: white; +} + +.constraints { + margin-top: 10px; +} + +#query-form{ + width: 80%; +} + + + +.query-box, .constraints { + padding: 5px; + margin: 5px; + font-weight: normal; + font-size: 24px; + letter-spacing: 0.08em; +} + +.query-box #q { + margin-left: 8px; + width: 60%; + height: 50px; + border: 1px solid #999; + font-size: 1em; + padding: 0.4em; +} + +.query-box { + +} + +.query-boost { + + top: 10px; + left: 50px; + position: relative; + font-size: 0.8em; +} + +.query-box .inputs{ + left: 180px; + position: relative; + +} + +#logo { + margin: 10px; + border-style: none; +} + +.pagination { + padding-left: 33%; + background: #eee; + margin: 5px; + margin-left: 210px; + padding-top: 5px; + padding-bottom: 5px; +} + +.result-document { + border: 1px solid #999; + padding: 5px; + margin: 5px; + margin-left: 210px; + margin-bottom: 15px; +} + +.result-document div{ + padding: 5px; +} + +.result-title{ + width:60%; +} + +.result-body{ + background: #ddd; +} + +.mlt{ + +} + +.map{ + float: right; + position: relative; + top: -25px; +} + +.result-document:nth-child(2n+1) { + background-color: #eee; +} + + +.selected-facet-field { + font-weight: bold; +} + +li.show { + list-style: disc; +} + +.group-value{ + font-weight: bold; +} + +.error { + color: white; + background-color: red; + left: 210px; + width:80%; + position: relative; + +} diff --git a/zookeeper/solr/collection1/conf/velocity/mime_type_lists.vm b/zookeeper/solr/collection1/conf/velocity/mime_type_lists.vm new file mode 100644 index 0000000..1468bbd --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/mime_type_lists.vm @@ -0,0 +1,68 @@ +#** + * Define some Mime-Types, short and long form + *# + +## MimeType to extension map for detecting file type +## and showing proper icon +## List of types match the icons in /solr/img/filetypes + +## Short MimeType Names +## Was called $supportedtypes +#set($supportedMimeTypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip") + +## Long Form: map MimeType headers to our Short names +## Was called $extMap +#set( $mimeExtensionsMap = { + "application/x-7z-compressed": "7z", + "application/postscript": "ai", + "application/pgp-signature": "asc", + "application/octet-stream": "bin", + "application/x-bzip2": "bz2", + "text/x-c": "c", + "application/vnd.ms-htmlhelp": "chm", + "application/java-vm": "class", + "text/css": "css", + "text/csv": "csv", + "application/x-debian-package": "deb", + "application/msword": "doc", + "message/rfc822": "eml", + "image/gif": "gif", + "application/winhlp": "hlp", + "text/html": "html", + "application/java-archive": "jar", + "text/x-java-source": "java", + "image/jpeg": "jpeg", + "application/javascript": "js", + "application/vnd.oasis.opendocument.chart": "odc", + "application/vnd.oasis.opendocument.formula": "odf", + "application/vnd.oasis.opendocument.graphics": "odg", + "application/vnd.oasis.opendocument.image": "odi", + "application/vnd.oasis.opendocument.presentation": "odp", + "application/vnd.oasis.opendocument.spreadsheet": "ods", + "application/vnd.oasis.opendocument.text": "odt", + "application/pdf": "pdf", + "application/pgp-encrypted": "pgp", + "image/png": "png", + "application/vnd.ms-powerpoint": "ppt", + "audio/x-pn-realaudio": "ram", + "application/x-rar-compressed": "rar", + "application/vnd.rn-realmedia": "rm", + "application/rtf": "rtf", + "application/x-shockwave-flash": "swf", + "application/vnd.sun.xml.calc": "sxc", + "application/vnd.sun.xml.draw": "sxd", + "application/vnd.sun.xml.impress": "sxi", + "application/vnd.sun.xml.writer": "sxw", + "application/x-tar": "tar", + "application/x-tex": "tex", + "text/plain": "txt", + "text/x-vcard": "vcf", + "application/vnd.visio": "vsd", + "audio/x-wav": "wav", + "audio/x-ms-wma": "wma", + "video/x-ms-wmv": "wmv", + "application/vnd.ms-excel": "xls", + "application/xml": "xml", + "application/x-xpinstall": "xpi", + "application/zip": "zip" +}) diff --git a/zookeeper/solr/collection1/conf/velocity/pagination_bottom.vm b/zookeeper/solr/collection1/conf/velocity/pagination_bottom.vm new file mode 100644 index 0000000..71b8bdf --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/pagination_bottom.vm @@ -0,0 +1,22 @@ +#** + * Paging and Statistics at bottom of results + *# + +## Usually rendered in pagination div tag + +#if($response.response.get('grouped')) + ## pass +#else + + #link_to_previous_page("previous") + + $page.results_found + results found. + + Page $page.current_page_number + of $page.page_count + + #link_to_next_page("next") + +#end +
    diff --git a/zookeeper/solr/collection1/conf/velocity/pagination_top.vm b/zookeeper/solr/collection1/conf/velocity/pagination_top.vm new file mode 100644 index 0000000..e0ac8ac --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/pagination_top.vm @@ -0,0 +1,29 @@ +#** + * Paging and Statistics at top of results + *# + +## Usually rendered in pagination div tag + +## Grouped Results / Not Paginated +#if($response.response.get('grouped')) + + + + $response.response.get('grouped').size() group(s) + + found in ${response.responseHeader.QTime} ms + + +## Regular Results / Use Paging Links if needed +#else + + + $page.results_found + results found in + ${response.responseHeader.QTime} ms + + + Page $page.current_page_number + of $page.page_count + +#end ## end else non-grouped results, normal pagination diff --git a/zookeeper/solr/collection1/conf/velocity/product_doc.vm b/zookeeper/solr/collection1/conf/velocity/product_doc.vm new file mode 100644 index 0000000..c878d8c --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/product_doc.vm @@ -0,0 +1,32 @@ +#** + * Render a hit representing a Product + * assumed to have a field called "name" + *# + +
    #field('name') #if($params.getBool('mlt', false) == false)More Like This#end
    +##do we have a physical store for this product +#set($store = $doc.getFieldValue('store')) +#if($store)#end +
    Id: #field('id')
    +
    Price: #field('price_c')
    +
    Features: #field('features')
    +
    In Stock: #field('inStock')
    +
    + #set($mlt = $mltResults.get($docId)) + #set($mltOn = $params.getBool('mlt')) + #if($mltOn == true)
    Similar Items
    #end + #if ($mltOn && $mlt && $mlt.size() > 0) +
      + #foreach($mltHit in $mlt) + #set($mltId = $mltHit.getFieldValue('id')) +
    • Name: $mltHit.getFieldValue('name')
      +
      Price: $!number.currency($mltHit.getFieldValue('price')) In Stock: $mltHit.getFieldValue('inStock')
      + +
    • + #end +
    + #elseif($mltOn && $mlt.size() == 0) +
    No Similar Items Found
    + #end +
    +#parse('debug.vm') diff --git a/zookeeper/solr/collection1/conf/velocity/query.vm b/zookeeper/solr/collection1/conf/velocity/query.vm new file mode 100644 index 0000000..ddbab3f --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/query.vm @@ -0,0 +1,42 @@ +
    +
    +
    + Find: +
    Boost by Price + #parse("querySpatial.vm") + #parse("queryGroup.vm") +
    +
    + + #if($request.params.get('debugQuery')) + + #end + #if($annotate == true) + + #end + #foreach($fq in $request.params.getParams('fq')) + #if ($fq != "{!bbox}") + + #end + #end +
    + #foreach($fq in $params.getParams('fq')) + #set($previous_fq_count=$velocityCount - 1) + #if($fq != '') + > $fq + #end + #end +
    +
    + #if($request.params.get('debugQuery')) + toggle parsed query + + #end + #set($queryOpts = $request.params.get("queryOpts")) + #if($queryOpts && $queryOpts != "") + + #end +
    +
    + +
    diff --git a/zookeeper/solr/collection1/conf/velocity/query_form.vm b/zookeeper/solr/collection1/conf/velocity/query_form.vm new file mode 100644 index 0000000..70a0af2 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/query_form.vm @@ -0,0 +1,64 @@ +#** + * Renders the main query form + *# + +
    +
    + +
    + + Find: + + + + +
    + + + Boost by Price + + + #parse("query_spatial.vm") + #parse("query_group.vm") +
    +
    + + #if($request.params.get('debugQuery')) + + #end + #if($annotate == true) + + #end + #foreach($fq in $request.params.getParams('fq')) + #if ($fq != "{!bbox}") + + #end + #end + +
    + #foreach($fq in $params.getParams('fq')) + #set($previous_fq_count=$velocityCount - 1) + #if($fq != '') + > + $fq + #end + #end +
    + +
    + #if($request.params.get('debugQuery')) + toggle parsed query + + #end + #set($queryOpts = $request.params.get("queryOpts")) + #if($queryOpts && $queryOpts != "") + + #end +
    + +
    +
    diff --git a/zookeeper/solr/collection1/conf/velocity/query_group.vm b/zookeeper/solr/collection1/conf/velocity/query_group.vm new file mode 100644 index 0000000..42e5457 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/query_group.vm @@ -0,0 +1,43 @@ +#** + * Query settings for grouping by fields, + * e.g.: Manufacturer or Popularity + *# + +#set($queryOpts = $params.get("queryOpts")) + +#if($queryOpts == "group") +
    + #set($groupF = $request.params.get('group.field')) + + + + + +
    + +#end diff --git a/zookeeper/solr/collection1/conf/velocity/query_spatial.vm b/zookeeper/solr/collection1/conf/velocity/query_spatial.vm new file mode 100644 index 0000000..2bc2044 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/query_spatial.vm @@ -0,0 +1,75 @@ +#** + * Query logic for selecting location / Geospatial search + *# + +#set($queryOpts = $params.get("queryOpts")) + +#if($queryOpts == "spatial") + +
    + + #set($loc = $request.params.get('pt')) + ## Normalize first trip through to "none" because + ## an empty string generates an error message later on + #if( ! $loc ) + #set( $loc = "none" ) + #end + + #set($dist = $request.params.get('d', "10")) + + ## Cities for The Select List + #set( $cities = { + "none": "No Filter", + "45.17614,-93.87341": "Buffalo, MN", + "37.7752,-100.0232": "Dodge City, KS", + "35.0752,-97.032": "Oklahoma City, OK", + "37.7752,-122.4232": "San Francisco CA" + }) + + + + + Distance (KM): + + + + + + + +
    + + + +#end diff --git a/zookeeper/solr/collection1/conf/velocity/results_list.vm b/zookeeper/solr/collection1/conf/velocity/results_list.vm new file mode 100644 index 0000000..f73532b --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/results_list.vm @@ -0,0 +1,22 @@ +#** + * Render the main Results List + *# + +## Usually displayed inside
    + +#if($response.response.get('grouped')) + + #foreach($grouping in $response.response.get('grouped')) + #parse("hit_grouped.vm") + #end + +#else + + #foreach($doc in $response.results) + #parse("hit.vm") + ## Can get an extremely simple view of the doc + ## which might be nicer for debugging + ##parse("hit_plain.vm") + #end + +#end diff --git a/zookeeper/solr/collection1/conf/velocity/richtext_doc.vm b/zookeeper/solr/collection1/conf/velocity/richtext_doc.vm new file mode 100644 index 0000000..9e8d6cb --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/richtext_doc.vm @@ -0,0 +1,153 @@ +#** + * Render a complex document in the results list + *# + +## Load Mime-Type List and Mapping +#parse('mime_type_lists.vm') +## Sets: +## * supportedMimeTypes, AKA supportedtypes +## * mimeExtensionsMap, AKA extMap + +## Title +#if($doc.getFieldValue('title')) + #set($title = $esc.html($doc.getFirstValue('title'))) +#else + #set($title = "["+$doc.getFieldValue('id')+"]") +#end + +## URL +#if($doc.getFieldValue('url')) + #set($url = $doc.getFieldValue('url')) +#elseif($doc.getFieldValue('resourcename')) + #set($url = "file:///$doc.getFieldValue('resourcename')") +#else + #set($url = "$doc.getFieldValue('id')") +#end + +## Sort out Mime-Type +#set($ct = $list.get($doc.getFirstValue('content_type').split(";"),0)) +#set($filename = $doc.getFieldValue('resourcename')) +#set($filetype = false) +#set($filetype = $mimeExtensionsMap.get($ct)) + +## TODO: falling back to file extension is convenient, +## except when you don't have an icon for that extension +## example "application/vnd.openxmlformats-officedocument.wordprocessingml.document" +## document with a .docx extension. +## It'd be nice to fall back to an "unknown" or the existing "file" type +## We sort of do this below, but only if the filename has no extension +## (anything after the last dot). + +#if(!$filetype) + #set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1)) +#end + +## #if(!$filetype) +## #set($filetype = "file") +## #end +## #if(!$supportedMimeTypes.contains($filetype)) +## #set($filetype = "file") +## #end + +## Row 1: Icon and Title and mlt link +
    + ## Icon + ## Small file type icons from http://www.splitbrain.org/projects/file_icons (public domain) + + + ## Title, hyperlinked + + $title + + ## Link for MLT / More Like This / Find Similar + + #if($params.getBool('mlt', false) == false) + + More Like This + #end + + +
    + +## Row 2?: ID / URL +
    + Id: #field('id') +
    + +## Resource Name +
    + #if($doc.getFieldValue('resourcename')) + Resource name: $filename + #elseif($url) + URL: $url + #end + #if($ct) + ($ct) + #end +
    + +## Author +#if($doc.getFieldValue('author')) +
    + Author: #field('author') +
    +#end + +## Last_Modified Date +#if($doc.getFieldValue('last_modified')) +
    + last-modified: + #field('last_modified') +
    +#end + +## Main content of doc +
    + #field('content') +
    + +## Display Similar Documents / MLT = More Like This +
    + #set($mlt = $mltResults.get($docId)) + #set($mltOn = $params.getBool('mlt')) + #if($mltOn == true) +
    + Similar Items +
    + #end + ## If has MLT enabled An Entries to show + #if ($mltOn && $mlt && $mlt.size() > 0) +
      + #foreach($mltHit in $mlt) + #set($mltId = $mltHit.getFieldValue('id')) +
    • + +
      + + Title: + + $mltHit.getFieldValue('title') +
      +
      + + Author: + + $mltHit.getFieldValue('author') + + Description: + + $mltHit.getFieldValue('description') +
      +
    • + #end ## end for each mltHit in $mlt +
    + ## Else MLT Enabled but no mlt results for this query + #elseif($mltOn && $mlt.size() == 0) +
    No Similar Items Found
    + #end +
    ## div class=mlt + +#parse('debug.vm') diff --git a/zookeeper/solr/collection1/conf/velocity/suggest.vm b/zookeeper/solr/collection1/conf/velocity/suggest.vm new file mode 100644 index 0000000..dae6b83 --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/suggest.vm @@ -0,0 +1,8 @@ +#** + * Provides cynamic spelling suggestions + * as you type in the search form + *# + +#foreach($t in $response.response.terms.name) + $t.key +#end diff --git a/zookeeper/solr/collection1/conf/velocity/tabs.vm b/zookeeper/solr/collection1/conf/velocity/tabs.vm new file mode 100644 index 0000000..da19cbc --- /dev/null +++ b/zookeeper/solr/collection1/conf/velocity/tabs.vm @@ -0,0 +1,50 @@ +#** + * Provides navigation/access to Advanced search options + * Usually displayed near the top of the page + *# + +##TODO: Make some nice tabs here + +#set($queryOpts = $params.get("queryOpts")) + +
    + + Type of Search: + + ##queryOpts=$queryOpts + + ## return to Simple Search + ##set( $selected = ($queryOpts && $queryOpts != "") ) + #set( $selected = ! $queryOpts ) + + #if($selected) + Simple + #else + + Simple + #end + + + ## GEO-Spatial / Location Based + #set( $selected = ($queryOpts == "spatial") ) + + #if($selected) + Spatial + #else + + Spatial + #end + + + ## Group By Field + #set( $selected = ($queryOpts == "group") ) + + #if($selected) + Group By + #else + + Group By + #end + + +
    diff --git a/zookeeper/solr/collection1/conf/xslt/example.xsl b/zookeeper/solr/collection1/conf/xslt/example.xsl new file mode 100644 index 0000000..b899270 --- /dev/null +++ b/zookeeper/solr/collection1/conf/xslt/example.xsl @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + <xsl:value-of select="$title"/> + + + +

    +
    + This has been formatted by the sample "example.xsl" transform - + use your own XSLT to get a nicer page +
    + + + +
    + + + +
    + + + + +
    +
    +
    + + + + + + + + + + + + + + javascript:toggle("");? +
    + + exp + + + + + +
    + + +
    + + + + + + + +
      + +
    • +
      +
    + + +
    + + + + + + + + + + + + + + + + + + + + +
    diff --git a/zookeeper/solr/collection1/conf/xslt/example_atom.xsl b/zookeeper/solr/collection1/conf/xslt/example_atom.xsl new file mode 100644 index 0000000..b6c2315 --- /dev/null +++ b/zookeeper/solr/collection1/conf/xslt/example_atom.xsl @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + Example Solr Atom 1.0 Feed + + This has been formatted by the sample "example_atom.xsl" transform - + use your own XSLT to get a nicer Atom feed. + + + Apache Solr + solr-user@lucene.apache.org + + + + + + tag:localhost,2007:example + + + + + + + + + <xsl:value-of select="str[@name='name']"/> + + tag:localhost,2007: + + + + + + diff --git a/zookeeper/solr/collection1/conf/xslt/example_rss.xsl b/zookeeper/solr/collection1/conf/xslt/example_rss.xsl new file mode 100644 index 0000000..2857f11 --- /dev/null +++ b/zookeeper/solr/collection1/conf/xslt/example_rss.xsl @@ -0,0 +1,66 @@ + + + + + + + + + + + + + Example Solr RSS 2.0 Feed + http://localhost:8983/solr + + This has been formatted by the sample "example_rss.xsl" transform - + use your own XSLT to get a nicer RSS feed. + + en-us + http://localhost:8983/solr + + + + + + + + + + + <xsl:value-of select="str[@name='name']"/> + + http://localhost:8983/solr/select?q=id: + + + + + + + http://localhost:8983/solr/select?q=id: + + + + diff --git a/zookeeper/solr/collection1/conf/xslt/luke.xsl b/zookeeper/solr/collection1/conf/xslt/luke.xsl new file mode 100644 index 0000000..8553f3c --- /dev/null +++ b/zookeeper/solr/collection1/conf/xslt/luke.xsl @@ -0,0 +1,337 @@ + + + + + + + + + Solr Luke Request Handler Response + + + + + + + + + <xsl:value-of select="$title"/> + + + + + +

    + +

    +
    + +
    + +

    Index Statistics

    + +
    + +

    Field Statistics

    + + + +

    Document statistics

    + + + + +
    + + + + + +
    + +
    + + +
    + +
    + +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + +
    +

    + +

    + +
    + +
    +
    +
    + + +
    + + 50 + 800 + 160 + blue + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + background-color: ; width: px; height: px; +
    +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      + +
    • + +
    • +
      +
    + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + + + + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + + + + + + + + + + + + + + + + +
    diff --git a/zookeeper/solr/collection1/conf/xslt/updateXml.xsl b/zookeeper/solr/collection1/conf/xslt/updateXml.xsl new file mode 100644 index 0000000..daf1344 --- /dev/null +++ b/zookeeper/solr/collection1/conf/xslt/updateXml.xsl @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zookeeper/solr/collection1/core.properties b/zookeeper/solr/collection1/core.properties new file mode 100644 index 0000000..bc0cf7d --- /dev/null +++ b/zookeeper/solr/collection1/core.properties @@ -0,0 +1 @@ +name=collection1 \ No newline at end of file diff --git a/zookeeper/solr/solr.xml b/zookeeper/solr/solr.xml new file mode 100644 index 0000000..7ae7244 --- /dev/null +++ b/zookeeper/solr/solr.xml @@ -0,0 +1,45 @@ + + + + + + + + + ${host:} + ${jetty.port:8983} + ${hostContext:solr} + ${zkClientTimeout:15000} + ${genericCoreNodeNames:true} + + + + ${socketTimeout:0} + ${connTimeout:0} + + + diff --git a/zookeeper/solr/zoo.cfg b/zookeeper/solr/zoo.cfg new file mode 100644 index 0000000..aea4518 --- /dev/null +++ b/zookeeper/solr/zoo.cfg @@ -0,0 +1,17 @@ +# The number of milliseconds of each tick +tickTime=2000 +# The number of ticks that the initial +# synchronization phase can take +initLimit=10 +# The number of ticks that can pass between +# sending a request and getting an acknowledgement +syncLimit=5 + +# the directory where the snapshot is stored. +# dataDir=/opt/zookeeper/data +# NOTE: Solr defaults the dataDir to /zoo_data + +# the port at which the clients will connect +# clientPort=2181 +# NOTE: Solr sets this based on zkRun / zkHost params + diff --git a/zookeeper/start.jar b/zookeeper/start.jar new file mode 100644 index 0000000..3e47369 Binary files /dev/null and b/zookeeper/start.jar differ diff --git a/zookeeper/webapps/solr-4.4.0.war b/zookeeper/webapps/solr-4.4.0.war new file mode 120000 index 0000000..e25a915 --- /dev/null +++ b/zookeeper/webapps/solr-4.4.0.war @@ -0,0 +1 @@ +../dist/solr-4.4.0.war \ No newline at end of file diff --git a/zookeeper/zookeeper.sh b/zookeeper/zookeeper.sh new file mode 120000 index 0000000..8f61e24 --- /dev/null +++ b/zookeeper/zookeeper.sh @@ -0,0 +1 @@ +../scripts/zookeeper.sh \ No newline at end of file diff --git a/zookeeper/zookeeper.sh.pid b/zookeeper/zookeeper.sh.pid new file mode 100644 index 0000000..3ebb04f --- /dev/null +++ b/zookeeper/zookeeper.sh.pid @@ -0,0 +1 @@ +18024