2 package org.z3950.zing.cql;
3 import java.io.IOException;
4 import java.util.Properties;
5 import java.io.InputStream;
6 import java.io.FileInputStream;
7 import java.io.FileNotFoundException;
9 import java.io.StringReader;
10 import java.util.ArrayList;
11 import java.util.HashSet;
12 import java.util.List;
17 * Compiles CQL strings into parse trees of CQLNode subtypes.
19 * @see <A href="http://zing.z3950.org/cql/index.html"
20 * >http://zing.z3950.org/cql/index.html</A>
22 public class CQLParser {
23 private CQLTokenizer lexer;
24 private final int compat; // When false, implement CQL 1.2
25 private final Set<String> customRelations = new HashSet<String>();
27 public static final int V1POINT1 = 12368;
28 public static final int V1POINT2 = 12369;
29 public static final int V1POINT1SORT = 12370;
30 public final boolean allowKeywordTerms;
32 static private boolean DEBUG = false;
33 static private boolean LEXDEBUG = false;
36 * The new parser implements a dialect of CQL specified by the
37 * <tt>compat</tt> argument:
39 * <li>V1POINT1 - CQL version 1.1
41 * <li>V1POINT2 - CQL version 1.2
43 * <li>V1POINT1SORT - CQL version 1.1 but including
44 * <tt>sortby</tt> as specified for CQL 1.2.
48 public CQLParser(int compat) {
50 this.allowKeywordTerms = true;
54 * Official CQL grammar allows registered keywords like 'and/or/not/sortby/prox'
55 * to be used unquoted in terms. This constructor allows to create an instance
56 * of a parser that prohibits this behavior while sacrificing compatibility.
57 * @param compat CQL version compatibility
58 * @param allowKeywordTerms when false registered keywords are disallowed in unquoted terms
60 public CQLParser(int compat, boolean allowKeywordTerms) {
62 this.allowKeywordTerms = allowKeywordTerms;
66 * The new parser implements CQL 1.2
69 this.compat = V1POINT2;
70 this.allowKeywordTerms = true;
73 private static void debug(String str) {
75 System.err.println("PARSEDEBUG: " + str);
79 * Registers custom relation in this parser. Note that when a custom relation
80 * is registered the parser is no longer strictly compliant with the chosen spec.
82 * @return true if custom relation has not been registered already
84 public boolean registerCustomRelation(String relation) {
85 return customRelations.add(relation);
89 * Unregisters previously registered custom relation in this instance of the parser.
91 * @return true is relation has been previously registered
93 public boolean unregisterCustomRelation(String relation) {
94 return customRelations.remove(relation);
98 * Compiles a CQL query.
100 * The resulting parse tree may be further processed by hand (see
101 * the individual node-types' documentation for details on the
102 * data structure) or, more often, simply rendered out in the
103 * desired form using one of the back-ends. <TT>toCQL()</TT>
104 * returns a decompiled CQL query equivalent to the one that was
105 * compiled in the first place; <TT>toXCQL()</TT> returns an
106 * XML snippet representing the query; and <TT>toPQF()</TT>
107 * returns the query rendered in Index Data's Prefix Query
110 * @param cql The query
111 * @return A CQLNode object which is the root of a parse
112 * tree representing the query. */
113 public CQLNode parse(String cql)
114 throws CQLParseException, IOException {
115 lexer = new CQLLexer(cql, LEXDEBUG);
118 debug("about to parseQuery()");
119 CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
120 new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
121 if (lexer.what() != CQLTokenizer.TT_EOF)
122 throw new CQLParseException("junk after end: " + lexer.render(),
128 private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation)
129 throws CQLParseException, IOException {
130 debug("top-level prefix mapping");
132 if (lexer.what() == '>') {
133 return parsePrefix(index, relation, true);
136 CQLNode node = parseQuery(index, relation);
137 if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
138 lexer.what() == CQLTokenizer.TT_SORTBY) {
142 CQLSortNode sortnode = new CQLSortNode(node);
143 while (lexer.what() != CQLTokenizer.TT_EOF) {
144 String sortindex = matchSymbol("sort index");
145 ModifierSet ms = gatherModifiers(sortindex);
146 sortnode.addSortIndex(ms);
149 if (sortnode.keys.size() == 0) {
150 throw new CQLParseException("no sort keys", lexer.pos());
159 private CQLNode parseQuery(String index, CQLRelation relation)
160 throws CQLParseException, IOException {
161 debug("in parseQuery()");
163 CQLNode term = parseTerm(index, relation);
164 while (lexer.what() != CQLTokenizer.TT_EOF &&
165 lexer.what() != ')' &&
166 lexer.what() != CQLTokenizer.TT_SORTBY) {
167 if (lexer.what() == CQLTokenizer.TT_AND ||
168 lexer.what() == CQLTokenizer.TT_OR ||
169 lexer.what() == CQLTokenizer.TT_NOT ||
170 lexer.what() == CQLTokenizer.TT_PROX) {
171 int type = lexer.what();
172 String val = lexer.value();
174 ModifierSet ms = gatherModifiers(val);
175 CQLNode term2 = parseTerm(index, relation);
176 term = ((type == CQLTokenizer.TT_AND) ? new CQLAndNode(term, term2, ms) :
177 (type == CQLTokenizer.TT_OR) ? new CQLOrNode (term, term2, ms) :
178 (type == CQLTokenizer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
179 new CQLProxNode(term, term2, ms));
181 throw new CQLParseException("expected boolean, got " +
182 lexer.render(), lexer.pos());
186 debug("no more ops");
190 private ModifierSet gatherModifiers(String base)
191 throws CQLParseException, IOException {
192 debug("in gatherModifiers()");
194 ModifierSet ms = new ModifierSet(base);
195 while (lexer.what() == '/') {
197 if (lexer.what() != CQLTokenizer.TT_WORD)
198 throw new CQLParseException("expected modifier, "
199 + "got " + lexer.render(),
201 String type = lexer.value().toLowerCase();
203 if (!isSymbolicRelation()) {
204 // It's a simple modifier consisting of type only
205 ms.addModifier(type);
207 // It's a complex modifier of the form type=value
208 String comparision = lexer.render(lexer.what(), false);
210 String value = matchSymbol("modifier value");
211 ms.addModifier(type, comparision, value);
218 private CQLNode parseTerm(String index, CQLRelation relation)
219 throws CQLParseException, IOException {
220 debug("in parseTerm()");
224 if (lexer.what() == '(') {
225 debug("parenthesised term");
227 CQLNode expr = parseQuery(index, relation);
230 } else if (lexer.what() == '>') {
231 return parsePrefix(index, relation, false);
234 debug("non-parenthesised term");
235 word = matchSymbol("index or term");
236 while (lexer.what() == CQLTokenizer.TT_WORD && !isRelation()) {
237 word = word + " " + lexer.value();
238 match(CQLTokenizer.TT_WORD);
245 String relstr = (lexer.what() == CQLTokenizer.TT_WORD ?
246 lexer.value() : lexer.render(lexer.what(), false));
247 relation = new CQLRelation(relstr);
249 ModifierSet ms = gatherModifiers(relstr);
251 debug("index='" + index + ", " +
252 "relation='" + relation.toCQL() + "'");
255 CQLTermNode node = new CQLTermNode(index, relation, word);
256 debug("made term node " + node.toCQL());
260 private CQLNode parsePrefix(String index, CQLRelation relation,
262 throws CQLParseException, IOException {
263 debug("prefix mapping");
267 String identifier = matchSymbol("prefix-name");
268 if (lexer.what() == '=') {
271 identifier = matchSymbol("prefix-identifer");
273 CQLNode node = topLevel ?
274 parseTopLevelPrefixes(index, relation) :
275 parseQuery(index, relation);
277 return new CQLPrefixNode(name, identifier, node);
280 private boolean isRelation() {
281 debug("isRelation: checking what()=" + lexer.what() +
282 " (" + lexer.render() + ")");
283 if (lexer.what() == CQLTokenizer.TT_WORD &&
284 (lexer.value().indexOf('.') >= 0 ||
285 lexer.value().equals("any") ||
286 lexer.value().equals("all") ||
287 lexer.value().equals("within") ||
288 lexer.value().equals("encloses") ||
289 (lexer.value().equals("exact") && compat != V1POINT2) ||
290 (lexer.value().equals("scr") && compat != V1POINT2) ||
291 (lexer.value().equals("adj") && compat == V1POINT2) ||
292 customRelations.contains(lexer.value())))
295 return isSymbolicRelation();
298 private boolean isSymbolicRelation() {
299 debug("isSymbolicRelation: checking what()=" + lexer.what() +
300 " (" + lexer.render() + ")");
301 return (lexer.what() == '<' ||
302 lexer.what() == '>' ||
303 lexer.what() == '=' ||
304 lexer.what() == CQLTokenizer.TT_LE ||
305 lexer.what() == CQLTokenizer.TT_GE ||
306 lexer.what() == CQLTokenizer.TT_NE ||
307 lexer.what() == CQLTokenizer.TT_EQEQ);
310 private void match(int token)
311 throws CQLParseException, IOException {
312 debug("in match(" + lexer.render(token, true) + ")");
313 if (lexer.what() != token)
314 throw new CQLParseException("expected " +
315 lexer.render(token, true) +
316 ", " + "got " + lexer.render(),
319 debug("match() got token=" + lexer.what() + ", value()='" + lexer.value() + "'");
322 private String matchSymbol(String expected)
323 throws CQLParseException, IOException {
325 debug("in matchSymbol()");
326 if (lexer.what() == CQLTokenizer.TT_WORD ||
327 lexer.what() == '"' ||
328 // The following is a complete list of keywords. Because
329 // they're listed here, they can be used unquoted as
330 // indexes, terms, prefix names and prefix identifiers.
331 // ### Instead, we should ask the lexer whether what we
332 // have is a keyword, and let the knowledge reside there.
333 (allowKeywordTerms &&
334 lexer.what() == CQLTokenizer.TT_AND ||
335 lexer.what() == CQLTokenizer.TT_OR ||
336 lexer.what() == CQLTokenizer.TT_NOT ||
337 lexer.what() == CQLTokenizer.TT_PROX ||
338 lexer.what() == CQLTokenizer.TT_SORTBY)) {
339 String symbol = lexer.value();
344 throw new CQLParseException("expected " + expected + ", " +
345 "got " + lexer.render(), lexer.pos());
350 * Simple test-harness for the CQLParser class.
352 * Reads a CQL query either from its command-line argument, if
353 * there is one, or standard input otherwise. So these two
354 * invocations are equivalent:
356 * CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
357 * echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
359 * The test-harness parses the supplied query and renders is as
360 * XCQL, so that both of the invocations above produce the
365 * <value>and</value>
369 * <value>or</value>
371 * <searchClause>
372 * <index>au</index>
374 * <value>=</value>
376 * <term>Kerninghan</term>
377 * </searchClause>
378 * <searchClause>
379 * <index>au</index>
381 * <value>=</value>
383 * <term>Ritchie</term>
384 * </searchClause>
386 * <searchClause>
387 * <index>ti</index>
389 * <value>=</value>
391 * <term>Unix</term>
392 * </searchClause>
397 * CQL version 1.1 (default version 1.2)
399 * Debug mode: extra output written to stderr.
401 * Causes the output to be written in CQL rather than XCQL - that
402 * is, a query equivalent to that which was input, is output. In
403 * effect, the test harness acts as a query canonicaliser.
405 * The input query, either as XCQL [default] or CQL [if the
406 * <TT>-c</TT> option is supplied].
408 public static void main (String[] args) {
409 char mode = 'x'; // x=XCQL, c=CQL, p=PQF
412 List<String> argv = new ArrayList<String>();
413 for (int i = 0; i < args.length; i++) {
417 int compat = V1POINT2;
418 if (argv.size() > 0 && argv.get(0).equals("-1")) {
423 if (argv.size() > 0 && argv.get(0).equals("-d")) {
428 if (argv.size() > 0 && argv.get(0).equals("-c")) {
431 } else if (argv.size() > 1 && argv.get(0).equals("-p")) {
434 pfile = (String) argv.get(0);
438 if (argv.size() > 1) {
439 System.err.println("Usage: CQLParser [-1] [-d] [-c] " +
440 "[-p <pqf-properties> [<CQL-query>]");
441 System.err.println("If unspecified, query is read from stdin");
446 if (argv.size() == 1) {
447 cql = (String) argv.get(0);
449 byte[] bytes = new byte[10000];
451 // Read in the whole of standard input in one go
452 int nbytes = System.in.read(bytes);
453 } catch (IOException ex) {
454 System.err.println("Can't read query: " + ex.getMessage());
457 cql = new String(bytes);
460 CQLParser parser = new CQLParser(compat);
463 root = parser.parse(cql);
464 } catch (CQLParseException ex) {
465 System.err.println("Syntax error: " + ex.getMessage());
467 } catch (IOException ex) {
468 System.err.println("Can't compile query: " + ex.getMessage());
474 System.out.println(root.toCQL());
475 } else if (mode == 'p') {
476 InputStream f = new FileInputStream(pfile);
478 throw new FileNotFoundException(pfile);
480 Properties config = new Properties();
483 System.out.println(root.toPQF(config));
485 System.out.print(root.toXCQL());
487 } catch (IOException ex) {
488 System.err.println("Can't render query: " + ex.getMessage());
490 } catch (UnknownIndexException ex) {
491 System.err.println("Unknown index: " + ex.getMessage());
493 } catch (UnknownRelationException ex) {
494 System.err.println("Unknown relation: " + ex.getMessage());
496 } catch (UnknownRelationModifierException ex) {
497 System.err.println("Unknown relation modifier: " +
500 } catch (UnknownPositionException ex) {
501 System.err.println("Unknown position: " + ex.getMessage());
503 } catch (PQFTranslationException ex) {
504 // We catch all of this class's subclasses, so --
505 throw new Error("can't get a PQFTranslationException");