1 // $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $
3 package org.z3950.zing.cql;
4 import java.io.IOException;
5 import java.util.Properties;
6 import java.io.InputStream;
7 import java.io.FileInputStream;
8 import java.io.FileNotFoundException;
10 import java.io.StringReader;
11 import java.util.ArrayList;
12 import java.util.List;
16 * Compiles CQL strings into parse trees of CQLNode subtypes.
18 * @version $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $
19 * @see <A href="http://zing.z3950.org/cql/index.html"
20 * >http://zing.z3950.org/cql/index.html</A>
22 public class CQLParser {
23 private CQLLexer lexer;
24 private int compat; // When false, implement CQL 1.2
25 public static final int V1POINT1 = 12368;
26 public static final int V1POINT2 = 12369;
27 public static final int V1POINT1SORT = 12370;
29 static private boolean DEBUG = false;
30 static private boolean LEXDEBUG = false;
33 * The new parser implements a dialect of CQL specified by the
34 * <tt>compat</tt> argument:
36 * <li>V1POINT1 - CQL version 1.1
38 * <li>V1POINT2 - CQL version 1.2
40 * <li>V1POINT1SORT - CQL version 1.1 but including
41 * <tt>sortby</tt> as specified for CQL 1.2.
45 public CQLParser(int compat) {
50 * The new parser implements CQL 1.2
53 this.compat = V1POINT2;
56 private static void debug(String str) {
58 System.err.println("PARSEDEBUG: " + str);
62 * Compiles a CQL query.
64 * The resulting parse tree may be further processed by hand (see
65 * the individual node-types' documentation for details on the
66 * data structure) or, more often, simply rendered out in the
67 * desired form using one of the back-ends. <TT>toCQL()</TT>
68 * returns a decompiled CQL query equivalent to the one that was
69 * compiled in the first place; <TT>toXCQL()</TT> returns an
70 * XML snippet representing the query; and <TT>toPQF()</TT>
71 * returns the query rendered in Index Data's Prefix Query
74 * @param cql The query
75 * @return A CQLNode object which is the root of a parse
76 * tree representing the query. */
77 public CQLNode parse(String cql) throws CQLParseException, IOException {
78 return parse(new StringReader(cql));
82 * Compiles a CQL query.
84 * The resulting parse tree may be further processed by hand (see
85 * the individual node-types' documentation for details on the
86 * data structure) or, more often, simply rendered out in the
87 * desired form using one of the back-ends. <TT>toCQL()</TT>
88 * returns a decompiled CQL query equivalent to the one that was
89 * compiled in the first place; <TT>toXCQL()</TT> returns an
90 * XML snippet representing the query; and <TT>toPQF()</TT>
91 * returns the query rendered in Index Data's Prefix Query
94 * @param cql The query
95 * @return A CQLNode object which is the root of a parse
96 * tree representing the query. */
97 public CQLNode parse(Reader cql)
98 throws CQLParseException, IOException {
99 lexer = new CQLLexer(cql, LEXDEBUG);
102 debug("about to parseQuery()");
103 CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
104 new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
105 if (lexer.ttype != CQLLexer.TT_EOF)
106 throw new CQLParseException("junk after end: " + lexer.render());
111 private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation)
112 throws CQLParseException, IOException {
113 debug("top-level prefix mapping");
115 if (lexer.ttype == '>') {
116 return parsePrefix(index, relation, true);
119 CQLNode node = parseQuery(index, relation);
120 if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
121 lexer.ttype == CQLLexer.TT_SORTBY) {
125 CQLSortNode sortnode = new CQLSortNode(node);
126 while (lexer.ttype != CQLLexer.TT_EOF) {
127 String sortindex = matchSymbol("sort index");
128 ModifierSet ms = gatherModifiers(sortindex);
129 sortnode.addSortIndex(ms);
132 if (sortnode.keys.size() == 0) {
133 throw new CQLParseException("no sort keys");
142 private CQLNode parseQuery(String index, CQLRelation relation)
143 throws CQLParseException, IOException {
144 debug("in parseQuery()");
146 CQLNode term = parseTerm(index, relation);
147 while (lexer.ttype != CQLLexer.TT_EOF &&
148 lexer.ttype != ')' &&
149 lexer.ttype != CQLLexer.TT_SORTBY) {
150 if (lexer.ttype == CQLLexer.TT_AND ||
151 lexer.ttype == CQLLexer.TT_OR ||
152 lexer.ttype == CQLLexer.TT_NOT ||
153 lexer.ttype == CQLLexer.TT_PROX) {
154 int type = lexer.ttype;
155 String val = lexer.sval;
157 ModifierSet ms = gatherModifiers(val);
158 CQLNode term2 = parseTerm(index, relation);
159 term = ((type == CQLLexer.TT_AND) ? new CQLAndNode(term, term2, ms) :
160 (type == CQLLexer.TT_OR) ? new CQLOrNode (term, term2, ms) :
161 (type == CQLLexer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
162 new CQLProxNode(term, term2, ms));
164 throw new CQLParseException("expected boolean, got " +
169 debug("no more ops");
173 private ModifierSet gatherModifiers(String base)
174 throws CQLParseException, IOException {
175 debug("in gatherModifiers()");
177 ModifierSet ms = new ModifierSet(base);
178 while (lexer.ttype == '/') {
180 if (lexer.ttype != CQLLexer.TT_WORD)
181 throw new CQLParseException("expected modifier, "
182 + "got " + lexer.render());
183 String type = lexer.sval.toLowerCase();
185 if (!isSymbolicRelation()) {
186 // It's a simple modifier consisting of type only
187 ms.addModifier(type);
189 // It's a complex modifier of the form type=value
190 String comparision = lexer.render(lexer.ttype, false);
192 String value = matchSymbol("modifier value");
193 ms.addModifier(type, comparision, value);
200 private CQLNode parseTerm(String index, CQLRelation relation)
201 throws CQLParseException, IOException {
202 debug("in parseTerm()");
206 if (lexer.ttype == '(') {
207 debug("parenthesised term");
209 CQLNode expr = parseQuery(index, relation);
212 } else if (lexer.ttype == '>') {
213 return parsePrefix(index, relation, false);
216 debug("non-parenthesised term");
217 word = matchSymbol("index or term");
218 while (lexer.ttype == CQLLexer.TT_WORD && !isRelation()) {
219 word = word + " " + lexer.sval;
220 match(CQLLexer.TT_WORD);
227 String relstr = (lexer.ttype == CQLLexer.TT_WORD ?
228 lexer.sval : lexer.render(lexer.ttype, false));
229 relation = new CQLRelation(relstr);
231 ModifierSet ms = gatherModifiers(relstr);
233 debug("index='" + index + ", " +
234 "relation='" + relation.toCQL() + "'");
237 CQLTermNode node = new CQLTermNode(index, relation, word);
238 debug("made term node " + node.toCQL());
242 private CQLNode parsePrefix(String index, CQLRelation relation,
244 throws CQLParseException, IOException {
245 debug("prefix mapping");
249 String identifier = matchSymbol("prefix-name");
250 if (lexer.ttype == '=') {
253 identifier = matchSymbol("prefix-identifer");
255 CQLNode node = topLevel ?
256 parseTopLevelPrefixes(index, relation) :
257 parseQuery(index, relation);
259 return new CQLPrefixNode(name, identifier, node);
262 private boolean isRelation() {
263 debug("isRelation: checking ttype=" + lexer.ttype +
264 " (" + lexer.render() + ")");
265 if (lexer.ttype == CQLLexer.TT_WORD &&
266 (lexer.sval.indexOf('.') >= 0 ||
267 lexer.sval.equals("any") ||
268 lexer.sval.equals("all") ||
269 lexer.sval.equals("within") ||
270 lexer.sval.equals("encloses") ||
271 lexer.sval.equals("exact") ||
272 (lexer.sval.equals("scr") && compat != V1POINT2) ||
273 (lexer.sval.equals("adj") && compat == V1POINT2)))
276 return isSymbolicRelation();
279 private boolean isSymbolicRelation() {
280 debug("isSymbolicRelation: checking ttype=" + lexer.ttype +
281 " (" + lexer.render() + ")");
282 return (lexer.ttype == '<' ||
283 lexer.ttype == '>' ||
284 lexer.ttype == '=' ||
285 lexer.ttype == CQLLexer.TT_LE ||
286 lexer.ttype == CQLLexer.TT_GE ||
287 lexer.ttype == CQLLexer.TT_NE ||
288 lexer.ttype == CQLLexer.TT_EQEQ);
291 private void match(int token)
292 throws CQLParseException, IOException {
293 debug("in match(" + lexer.render(token, true) + ")");
294 if (lexer.ttype != token)
295 throw new CQLParseException("expected " +
296 lexer.render(token, true) +
297 ", " + "got " + lexer.render());
298 int tmp = lexer.nextToken();
299 debug("match() got token=" + lexer.ttype + ", " +
300 "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" +
301 " (tmp=" + tmp + ")");
304 private String matchSymbol(String expected)
305 throws CQLParseException, IOException {
307 debug("in matchSymbol()");
308 if (lexer.ttype == CQLLexer.TT_WORD ||
309 lexer.ttype == CQLLexer.TT_NUMBER ||
310 lexer.ttype == '"' ||
311 // The following is a complete list of keywords. Because
312 // they're listed here, they can be used unquoted as
313 // indexes, terms, prefix names and prefix identifiers.
314 // ### Instead, we should ask the lexer whether what we
315 // have is a keyword, and let the knowledge reside there.
316 lexer.ttype == CQLLexer.TT_AND ||
317 lexer.ttype == CQLLexer.TT_OR ||
318 lexer.ttype == CQLLexer.TT_NOT ||
319 lexer.ttype == CQLLexer.TT_PROX ||
320 lexer.ttype == CQLLexer.TT_SORTBY) {
321 String symbol = (lexer.ttype == CQLLexer.TT_NUMBER) ?
322 lexer.render() : lexer.sval;
327 throw new CQLParseException("expected " + expected + ", " +
328 "got " + lexer.render());
333 * Simple test-harness for the CQLParser class.
335 * Reads a CQL query either from its command-line argument, if
336 * there is one, or standard input otherwise. So these two
337 * invocations are equivalent:
339 * CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
340 * echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
342 * The test-harness parses the supplied query and renders is as
343 * XCQL, so that both of the invocations above produce the
348 * <value>and</value>
352 * <value>or</value>
354 * <searchClause>
355 * <index>au</index>
357 * <value>=</value>
359 * <term>Kerninghan</term>
360 * </searchClause>
361 * <searchClause>
362 * <index>au</index>
364 * <value>=</value>
366 * <term>Ritchie</term>
367 * </searchClause>
369 * <searchClause>
370 * <index>ti</index>
372 * <value>=</value>
374 * <term>Unix</term>
375 * </searchClause>
380 * CQL version 1.1 (default version 1.2)
382 * Debug mode: extra output written to stderr.
384 * Causes the output to be written in CQL rather than XCQL - that
385 * is, a query equivalent to that which was input, is output. In
386 * effect, the test harness acts as a query canonicaliser.
388 * The input query, either as XCQL [default] or CQL [if the
389 * <TT>-c</TT> option is supplied].
391 public static void main (String[] args) {
392 char mode = 'x'; // x=XCQL, c=CQL, p=PQF
395 List<String> argv = new ArrayList<String>();
396 for (int i = 0; i < args.length; i++) {
400 int compat = V1POINT2;
401 if (argv.size() > 0 && argv.get(0).equals("-1")) {
406 if (argv.size() > 0 && argv.get(0).equals("-d")) {
411 if (argv.size() > 0 && argv.get(0).equals("-c")) {
414 } else if (argv.size() > 1 && argv.get(0).equals("-p")) {
417 pfile = (String) argv.get(0);
421 if (argv.size() > 1) {
422 System.err.println("Usage: CQLParser [-1] [-d] [-c] " +
423 "[-p <pqf-properties> [<CQL-query>]");
424 System.err.println("If unspecified, query is read from stdin");
429 if (argv.size() == 1) {
430 cql = (String) argv.get(0);
432 byte[] bytes = new byte[10000];
434 // Read in the whole of standard input in one go
435 int nbytes = System.in.read(bytes);
436 } catch (IOException ex) {
437 System.err.println("Can't read query: " + ex.getMessage());
440 cql = new String(bytes);
443 CQLParser parser = new CQLParser(compat);
446 root = parser.parse(cql);
447 } catch (CQLParseException ex) {
448 System.err.println("Syntax error: " + ex.getMessage());
450 } catch (IOException ex) {
451 System.err.println("Can't compile query: " + ex.getMessage());
457 System.out.println(root.toCQL());
458 } else if (mode == 'p') {
459 InputStream f = new FileInputStream(pfile);
461 throw new FileNotFoundException(pfile);
463 Properties config = new Properties();
466 System.out.println(root.toPQF(config));
468 System.out.print(root.toXCQL());
470 } catch (IOException ex) {
471 System.err.println("Can't render query: " + ex.getMessage());
473 } catch (UnknownIndexException ex) {
474 System.err.println("Unknown index: " + ex.getMessage());
476 } catch (UnknownRelationException ex) {
477 System.err.println("Unknown relation: " + ex.getMessage());
479 } catch (UnknownRelationModifierException ex) {
480 System.err.println("Unknown relation modifier: " +
483 } catch (UnknownPositionException ex) {
484 System.err.println("Unknown position: " + ex.getMessage());
486 } catch (PQFTranslationException ex) {
487 // We catch all of this class's subclasses, so --
488 throw new Error("can't get a PQFTranslationException");