2 package org.z3950.zing.cql;
3 import java.io.IOException;
4 import java.util.Properties;
5 import java.io.InputStream;
6 import java.io.FileInputStream;
7 import java.io.FileNotFoundException;
9 import java.io.StringReader;
10 import java.util.ArrayList;
11 import java.util.HashSet;
12 import java.util.List;
17 * Compiles CQL strings into parse trees of CQLNode subtypes.
19 * @see <A href="http://zing.z3950.org/cql/index.html"
20 * >http://zing.z3950.org/cql/index.html</A>
22 public class CQLParser {
23 private CQLLexer lexer;
24 private PositionAwareReader par; //active reader with position
25 private final int compat; // When false, implement CQL 1.2
26 private final Set<String> customRelations = new HashSet<String>();
28 public static final int V1POINT1 = 12368;
29 public static final int V1POINT2 = 12369;
30 public static final int V1POINT1SORT = 12370;
31 public final boolean allowKeywordTerms;
33 static private boolean DEBUG = false;
34 static private boolean LEXDEBUG = false;
37 * The new parser implements a dialect of CQL specified by the
38 * <tt>compat</tt> argument:
40 * <li>V1POINT1 - CQL version 1.1
42 * <li>V1POINT2 - CQL version 1.2
44 * <li>V1POINT1SORT - CQL version 1.1 but including
45 * <tt>sortby</tt> as specified for CQL 1.2.
49 public CQLParser(int compat) {
51 this.allowKeywordTerms = true;
55 * Official CQL grammar allows registered keywords like 'and/or/not/sortby/prox'
56 * to be used unquoted in terms. This constructor allows to create an instance
57 * of a parser that prohibits this behavior while sacrificing compatibility.
58 * @param compat CQL version compatibility
59 * @param allowKeywordTerms when false registered keywords are disallowed in unquoted terms
61 public CQLParser(int compat, boolean allowKeywordTerms) {
63 this.allowKeywordTerms = allowKeywordTerms;
67 * The new parser implements CQL 1.2
70 this.compat = V1POINT2;
71 this.allowKeywordTerms = true;
74 private static void debug(String str) {
76 System.err.println("PARSEDEBUG: " + str);
80 * Registers custom relation in this parser. Note that when a custom relation
81 * is registered the parser is no longer strictly compliant with the chosen spec.
83 * @return true if custom relation has not been registered already
85 public boolean registerCustomRelation(String relation) {
86 return customRelations.add(relation);
90 * Unregisters previously registered custom relation in this instance of the parser.
92 * @return true is relation has been previously registered
94 public boolean unregisterCustomRelation(String relation) {
95 return customRelations.remove(relation);
99 * Compiles a CQL query.
101 * The resulting parse tree may be further processed by hand (see
102 * the individual node-types' documentation for details on the
103 * data structure) or, more often, simply rendered out in the
104 * desired form using one of the back-ends. <TT>toCQL()</TT>
105 * returns a decompiled CQL query equivalent to the one that was
106 * compiled in the first place; <TT>toXCQL()</TT> returns an
107 * XML snippet representing the query; and <TT>toPQF()</TT>
108 * returns the query rendered in Index Data's Prefix Query
111 * @param cql The query
112 * @return A CQLNode object which is the root of a parse
113 * tree representing the query. */
114 public CQLNode parse(String cql) throws CQLParseException, IOException {
115 return parse(new StringReader(cql));
119 * Compiles a CQL query.
121 * The resulting parse tree may be further processed by hand (see
122 * the individual node-types' documentation for details on the
123 * data structure) or, more often, simply rendered out in the
124 * desired form using one of the back-ends. <TT>toCQL()</TT>
125 * returns a decompiled CQL query equivalent to the one that was
126 * compiled in the first place; <TT>toXCQL()</TT> returns an
127 * XML snippet representing the query; and <TT>toPQF()</TT>
128 * returns the query rendered in Index Data's Prefix Query
131 * @param cql The query
132 * @return A CQLNode object which is the root of a parse
133 * tree representing the query. */
134 public CQLNode parse(Reader cql)
135 throws CQLParseException, IOException {
136 par = new PositionAwareReader(cql);
137 lexer = new CQLLexer(par, LEXDEBUG);
140 debug("about to parseQuery()");
141 CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
142 new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
143 if (lexer.ttype != CQLLexer.TT_EOF)
144 throw new CQLParseException("junk after end: " + lexer.render(),
150 private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation)
151 throws CQLParseException, IOException {
152 debug("top-level prefix mapping");
154 if (lexer.ttype == '>') {
155 return parsePrefix(index, relation, true);
158 CQLNode node = parseQuery(index, relation);
159 if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
160 lexer.ttype == CQLLexer.TT_SORTBY) {
164 CQLSortNode sortnode = new CQLSortNode(node);
165 while (lexer.ttype != CQLLexer.TT_EOF) {
166 String sortindex = matchSymbol("sort index");
167 ModifierSet ms = gatherModifiers(sortindex);
168 sortnode.addSortIndex(ms);
171 if (sortnode.keys.size() == 0) {
172 throw new CQLParseException("no sort keys", par.getPosition());
181 private CQLNode parseQuery(String index, CQLRelation relation)
182 throws CQLParseException, IOException {
183 debug("in parseQuery()");
185 CQLNode term = parseTerm(index, relation);
186 while (lexer.ttype != CQLLexer.TT_EOF &&
187 lexer.ttype != ')' &&
188 lexer.ttype != CQLLexer.TT_SORTBY) {
189 if (lexer.ttype == CQLLexer.TT_AND ||
190 lexer.ttype == CQLLexer.TT_OR ||
191 lexer.ttype == CQLLexer.TT_NOT ||
192 lexer.ttype == CQLLexer.TT_PROX) {
193 int type = lexer.ttype;
194 String val = lexer.sval;
196 ModifierSet ms = gatherModifiers(val);
197 CQLNode term2 = parseTerm(index, relation);
198 term = ((type == CQLLexer.TT_AND) ? new CQLAndNode(term, term2, ms) :
199 (type == CQLLexer.TT_OR) ? new CQLOrNode (term, term2, ms) :
200 (type == CQLLexer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
201 new CQLProxNode(term, term2, ms));
203 throw new CQLParseException("expected boolean, got " +
204 lexer.render(), par.getPosition());
208 debug("no more ops");
212 private ModifierSet gatherModifiers(String base)
213 throws CQLParseException, IOException {
214 debug("in gatherModifiers()");
216 ModifierSet ms = new ModifierSet(base);
217 while (lexer.ttype == '/') {
219 if (lexer.ttype != CQLLexer.TT_WORD)
220 throw new CQLParseException("expected modifier, "
221 + "got " + lexer.render(),
223 String type = lexer.sval.toLowerCase();
225 if (!isSymbolicRelation()) {
226 // It's a simple modifier consisting of type only
227 ms.addModifier(type);
229 // It's a complex modifier of the form type=value
230 String comparision = lexer.render(lexer.ttype, false);
232 String value = matchSymbol("modifier value");
233 ms.addModifier(type, comparision, value);
240 private CQLNode parseTerm(String index, CQLRelation relation)
241 throws CQLParseException, IOException {
242 debug("in parseTerm()");
246 if (lexer.ttype == '(') {
247 debug("parenthesised term");
249 CQLNode expr = parseQuery(index, relation);
252 } else if (lexer.ttype == '>') {
253 return parsePrefix(index, relation, false);
256 debug("non-parenthesised term");
257 word = matchSymbol("index or term");
258 while (lexer.ttype == CQLLexer.TT_WORD && !isRelation()) {
259 word = word + " " + lexer.sval;
260 match(CQLLexer.TT_WORD);
267 String relstr = (lexer.ttype == CQLLexer.TT_WORD ?
268 lexer.sval : lexer.render(lexer.ttype, false));
269 relation = new CQLRelation(relstr);
271 ModifierSet ms = gatherModifiers(relstr);
273 debug("index='" + index + ", " +
274 "relation='" + relation.toCQL() + "'");
277 CQLTermNode node = new CQLTermNode(index, relation, word);
278 debug("made term node " + node.toCQL());
282 private CQLNode parsePrefix(String index, CQLRelation relation,
284 throws CQLParseException, IOException {
285 debug("prefix mapping");
289 String identifier = matchSymbol("prefix-name");
290 if (lexer.ttype == '=') {
293 identifier = matchSymbol("prefix-identifer");
295 CQLNode node = topLevel ?
296 parseTopLevelPrefixes(index, relation) :
297 parseQuery(index, relation);
299 return new CQLPrefixNode(name, identifier, node);
302 private boolean isRelation() {
303 debug("isRelation: checking ttype=" + lexer.ttype +
304 " (" + lexer.render() + ")");
305 if (lexer.ttype == CQLLexer.TT_WORD &&
306 (lexer.sval.indexOf('.') >= 0 ||
307 lexer.sval.equals("any") ||
308 lexer.sval.equals("all") ||
309 lexer.sval.equals("within") ||
310 lexer.sval.equals("encloses") ||
311 (lexer.sval.equals("exact") && compat != V1POINT2) ||
312 (lexer.sval.equals("scr") && compat != V1POINT2) ||
313 (lexer.sval.equals("adj") && compat == V1POINT2) ||
314 customRelations.contains(lexer.sval)))
317 return isSymbolicRelation();
320 private boolean isSymbolicRelation() {
321 debug("isSymbolicRelation: checking ttype=" + lexer.ttype +
322 " (" + lexer.render() + ")");
323 return (lexer.ttype == '<' ||
324 lexer.ttype == '>' ||
325 lexer.ttype == '=' ||
326 lexer.ttype == CQLLexer.TT_LE ||
327 lexer.ttype == CQLLexer.TT_GE ||
328 lexer.ttype == CQLLexer.TT_NE ||
329 lexer.ttype == CQLLexer.TT_EQEQ);
332 private void match(int token)
333 throws CQLParseException, IOException {
334 debug("in match(" + lexer.render(token, true) + ")");
335 if (lexer.ttype != token)
336 throw new CQLParseException("expected " +
337 lexer.render(token, true) +
338 ", " + "got " + lexer.render(),
340 int tmp = lexer.nextToken();
341 debug("match() got token=" + lexer.ttype + ", " +
342 "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" +
343 " (tmp=" + tmp + ")");
346 private String matchSymbol(String expected)
347 throws CQLParseException, IOException {
349 debug("in matchSymbol()");
350 if (lexer.ttype == CQLLexer.TT_WORD ||
351 lexer.ttype == CQLLexer.TT_NUMBER ||
352 lexer.ttype == '"' ||
353 // The following is a complete list of keywords. Because
354 // they're listed here, they can be used unquoted as
355 // indexes, terms, prefix names and prefix identifiers.
356 // ### Instead, we should ask the lexer whether what we
357 // have is a keyword, and let the knowledge reside there.
358 (allowKeywordTerms &&
359 lexer.ttype == CQLLexer.TT_AND ||
360 lexer.ttype == CQLLexer.TT_OR ||
361 lexer.ttype == CQLLexer.TT_NOT ||
362 lexer.ttype == CQLLexer.TT_PROX ||
363 lexer.ttype == CQLLexer.TT_SORTBY)) {
364 String symbol = (lexer.ttype == CQLLexer.TT_NUMBER) ?
365 lexer.render() : lexer.sval;
370 throw new CQLParseException("expected " + expected + ", " +
371 "got " + lexer.render(), par.getPosition());
376 * Simple test-harness for the CQLParser class.
378 * Reads a CQL query either from its command-line argument, if
379 * there is one, or standard input otherwise. So these two
380 * invocations are equivalent:
382 * CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
383 * echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
385 * The test-harness parses the supplied query and renders is as
386 * XCQL, so that both of the invocations above produce the
391 * <value>and</value>
395 * <value>or</value>
397 * <searchClause>
398 * <index>au</index>
400 * <value>=</value>
402 * <term>Kerninghan</term>
403 * </searchClause>
404 * <searchClause>
405 * <index>au</index>
407 * <value>=</value>
409 * <term>Ritchie</term>
410 * </searchClause>
412 * <searchClause>
413 * <index>ti</index>
415 * <value>=</value>
417 * <term>Unix</term>
418 * </searchClause>
423 * CQL version 1.1 (default version 1.2)
425 * Debug mode: extra output written to stderr.
427 * Causes the output to be written in CQL rather than XCQL - that
428 * is, a query equivalent to that which was input, is output. In
429 * effect, the test harness acts as a query canonicaliser.
431 * The input query, either as XCQL [default] or CQL [if the
432 * <TT>-c</TT> option is supplied].
434 public static void main (String[] args) {
435 char mode = 'x'; // x=XCQL, c=CQL, p=PQF
438 List<String> argv = new ArrayList<String>();
439 for (int i = 0; i < args.length; i++) {
443 int compat = V1POINT2;
444 if (argv.size() > 0 && argv.get(0).equals("-1")) {
449 if (argv.size() > 0 && argv.get(0).equals("-d")) {
454 if (argv.size() > 0 && argv.get(0).equals("-c")) {
457 } else if (argv.size() > 1 && argv.get(0).equals("-p")) {
460 pfile = (String) argv.get(0);
464 if (argv.size() > 1) {
465 System.err.println("Usage: CQLParser [-1] [-d] [-c] " +
466 "[-p <pqf-properties> [<CQL-query>]");
467 System.err.println("If unspecified, query is read from stdin");
472 if (argv.size() == 1) {
473 cql = (String) argv.get(0);
475 byte[] bytes = new byte[10000];
477 // Read in the whole of standard input in one go
478 int nbytes = System.in.read(bytes);
479 } catch (IOException ex) {
480 System.err.println("Can't read query: " + ex.getMessage());
483 cql = new String(bytes);
486 CQLParser parser = new CQLParser(compat);
489 root = parser.parse(cql);
490 } catch (CQLParseException ex) {
491 System.err.println("Syntax error: " + ex.getMessage());
493 } catch (IOException ex) {
494 System.err.println("Can't compile query: " + ex.getMessage());
500 System.out.println(root.toCQL());
501 } else if (mode == 'p') {
502 InputStream f = new FileInputStream(pfile);
504 throw new FileNotFoundException(pfile);
506 Properties config = new Properties();
509 System.out.println(root.toPQF(config));
511 System.out.print(root.toXCQL());
513 } catch (IOException ex) {
514 System.err.println("Can't render query: " + ex.getMessage());
516 } catch (UnknownIndexException ex) {
517 System.err.println("Unknown index: " + ex.getMessage());
519 } catch (UnknownRelationException ex) {
520 System.err.println("Unknown relation: " + ex.getMessage());
522 } catch (UnknownRelationModifierException ex) {
523 System.err.println("Unknown relation modifier: " +
526 } catch (UnknownPositionException ex) {
527 System.err.println("Unknown position: " + ex.getMessage());
529 } catch (PQFTranslationException ex) {
530 // We catch all of this class's subclasses, so --
531 throw new Error("can't get a PQFTranslationException");