2 package org.z3950.zing.cql;
3 import java.io.IOException;
4 import java.util.Properties;
5 import java.io.InputStream;
6 import java.io.FileInputStream;
7 import java.io.FileNotFoundException;
9 import java.io.StringReader;
10 import java.util.ArrayList;
11 import java.util.HashSet;
12 import java.util.List;
17 * Compiles CQL strings into parse trees of CQLNode subtypes.
19 * @see <A href="http://zing.z3950.org/cql/index.html"
20 * >http://zing.z3950.org/cql/index.html</A>
22 public class CQLParser {
23 private CQLLexer lexer;
24 private PositionAwareReader par; //active reader with position
25 private int compat; // When false, implement CQL 1.2
26 private final Set<String> customRelations = new HashSet<String>();
28 public static final int V1POINT1 = 12368;
29 public static final int V1POINT2 = 12369;
30 public static final int V1POINT1SORT = 12370;
32 static private boolean DEBUG = false;
33 static private boolean LEXDEBUG = false;
36 * The new parser implements a dialect of CQL specified by the
37 * <tt>compat</tt> argument:
39 * <li>V1POINT1 - CQL version 1.1
41 * <li>V1POINT2 - CQL version 1.2
43 * <li>V1POINT1SORT - CQL version 1.1 but including
44 * <tt>sortby</tt> as specified for CQL 1.2.
48 public CQLParser(int compat) {
53 * The new parser implements CQL 1.2
56 this.compat = V1POINT2;
59 private static void debug(String str) {
61 System.err.println("PARSEDEBUG: " + str);
65 * Registers custom relation in this parser. Note that when a custom relation
66 * is registered the parser is no longer strictly compliant with the chosen spec.
68 * @return true if custom relation has not been registered already
70 public boolean registerCustomRelation(String relation) {
71 return customRelations.add(relation);
75 * Unregisters previously registered custom relation in this instance of the parser.
77 * @return true is relation has been previously registered
79 public boolean unregisterCustomRelation(String relation) {
80 return customRelations.remove(relation);
84 * Compiles a CQL query.
86 * The resulting parse tree may be further processed by hand (see
87 * the individual node-types' documentation for details on the
88 * data structure) or, more often, simply rendered out in the
89 * desired form using one of the back-ends. <TT>toCQL()</TT>
90 * returns a decompiled CQL query equivalent to the one that was
91 * compiled in the first place; <TT>toXCQL()</TT> returns an
92 * XML snippet representing the query; and <TT>toPQF()</TT>
93 * returns the query rendered in Index Data's Prefix Query
96 * @param cql The query
97 * @return A CQLNode object which is the root of a parse
98 * tree representing the query. */
99 public CQLNode parse(String cql) throws CQLParseException, IOException {
100 return parse(new StringReader(cql));
104 * Compiles a CQL query.
106 * The resulting parse tree may be further processed by hand (see
107 * the individual node-types' documentation for details on the
108 * data structure) or, more often, simply rendered out in the
109 * desired form using one of the back-ends. <TT>toCQL()</TT>
110 * returns a decompiled CQL query equivalent to the one that was
111 * compiled in the first place; <TT>toXCQL()</TT> returns an
112 * XML snippet representing the query; and <TT>toPQF()</TT>
113 * returns the query rendered in Index Data's Prefix Query
116 * @param cql The query
117 * @return A CQLNode object which is the root of a parse
118 * tree representing the query. */
119 public CQLNode parse(Reader cql)
120 throws CQLParseException, IOException {
121 par = new PositionAwareReader(cql);
122 lexer = new CQLLexer(par, LEXDEBUG);
125 debug("about to parseQuery()");
126 CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
127 new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
128 if (lexer.ttype != CQLLexer.TT_EOF)
129 throw new CQLParseException("junk after end: " + lexer.render(),
135 private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation)
136 throws CQLParseException, IOException {
137 debug("top-level prefix mapping");
139 if (lexer.ttype == '>') {
140 return parsePrefix(index, relation, true);
143 CQLNode node = parseQuery(index, relation);
144 if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
145 lexer.ttype == CQLLexer.TT_SORTBY) {
149 CQLSortNode sortnode = new CQLSortNode(node);
150 while (lexer.ttype != CQLLexer.TT_EOF) {
151 String sortindex = matchSymbol("sort index");
152 ModifierSet ms = gatherModifiers(sortindex);
153 sortnode.addSortIndex(ms);
156 if (sortnode.keys.size() == 0) {
157 throw new CQLParseException("no sort keys", par.getPosition());
166 private CQLNode parseQuery(String index, CQLRelation relation)
167 throws CQLParseException, IOException {
168 debug("in parseQuery()");
170 CQLNode term = parseTerm(index, relation);
171 while (lexer.ttype != CQLLexer.TT_EOF &&
172 lexer.ttype != ')' &&
173 lexer.ttype != CQLLexer.TT_SORTBY) {
174 if (lexer.ttype == CQLLexer.TT_AND ||
175 lexer.ttype == CQLLexer.TT_OR ||
176 lexer.ttype == CQLLexer.TT_NOT ||
177 lexer.ttype == CQLLexer.TT_PROX) {
178 int type = lexer.ttype;
179 String val = lexer.sval;
181 ModifierSet ms = gatherModifiers(val);
182 CQLNode term2 = parseTerm(index, relation);
183 term = ((type == CQLLexer.TT_AND) ? new CQLAndNode(term, term2, ms) :
184 (type == CQLLexer.TT_OR) ? new CQLOrNode (term, term2, ms) :
185 (type == CQLLexer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
186 new CQLProxNode(term, term2, ms));
188 throw new CQLParseException("expected boolean, got " +
189 lexer.render(), par.getPosition());
193 debug("no more ops");
197 private ModifierSet gatherModifiers(String base)
198 throws CQLParseException, IOException {
199 debug("in gatherModifiers()");
201 ModifierSet ms = new ModifierSet(base);
202 while (lexer.ttype == '/') {
204 if (lexer.ttype != CQLLexer.TT_WORD)
205 throw new CQLParseException("expected modifier, "
206 + "got " + lexer.render(),
208 String type = lexer.sval.toLowerCase();
210 if (!isSymbolicRelation()) {
211 // It's a simple modifier consisting of type only
212 ms.addModifier(type);
214 // It's a complex modifier of the form type=value
215 String comparision = lexer.render(lexer.ttype, false);
217 String value = matchSymbol("modifier value");
218 ms.addModifier(type, comparision, value);
225 private CQLNode parseTerm(String index, CQLRelation relation)
226 throws CQLParseException, IOException {
227 debug("in parseTerm()");
231 if (lexer.ttype == '(') {
232 debug("parenthesised term");
234 CQLNode expr = parseQuery(index, relation);
237 } else if (lexer.ttype == '>') {
238 return parsePrefix(index, relation, false);
241 debug("non-parenthesised term");
242 word = matchSymbol("index or term");
243 while (lexer.ttype == CQLLexer.TT_WORD && !isRelation()) {
244 word = word + " " + lexer.sval;
245 match(CQLLexer.TT_WORD);
252 String relstr = (lexer.ttype == CQLLexer.TT_WORD ?
253 lexer.sval : lexer.render(lexer.ttype, false));
254 relation = new CQLRelation(relstr);
256 ModifierSet ms = gatherModifiers(relstr);
258 debug("index='" + index + ", " +
259 "relation='" + relation.toCQL() + "'");
262 CQLTermNode node = new CQLTermNode(index, relation, word);
263 debug("made term node " + node.toCQL());
267 private CQLNode parsePrefix(String index, CQLRelation relation,
269 throws CQLParseException, IOException {
270 debug("prefix mapping");
274 String identifier = matchSymbol("prefix-name");
275 if (lexer.ttype == '=') {
278 identifier = matchSymbol("prefix-identifer");
280 CQLNode node = topLevel ?
281 parseTopLevelPrefixes(index, relation) :
282 parseQuery(index, relation);
284 return new CQLPrefixNode(name, identifier, node);
287 private boolean isRelation() {
288 debug("isRelation: checking ttype=" + lexer.ttype +
289 " (" + lexer.render() + ")");
290 if (lexer.ttype == CQLLexer.TT_WORD &&
291 (lexer.sval.indexOf('.') >= 0 ||
292 lexer.sval.equals("any") ||
293 lexer.sval.equals("all") ||
294 lexer.sval.equals("within") ||
295 lexer.sval.equals("encloses") ||
296 (lexer.sval.equals("exact") && compat != V1POINT2) ||
297 (lexer.sval.equals("scr") && compat != V1POINT2) ||
298 (lexer.sval.equals("adj") && compat == V1POINT2) ||
299 customRelations.contains(lexer.sval)))
302 return isSymbolicRelation();
305 private boolean isSymbolicRelation() {
306 debug("isSymbolicRelation: checking ttype=" + lexer.ttype +
307 " (" + lexer.render() + ")");
308 return (lexer.ttype == '<' ||
309 lexer.ttype == '>' ||
310 lexer.ttype == '=' ||
311 lexer.ttype == CQLLexer.TT_LE ||
312 lexer.ttype == CQLLexer.TT_GE ||
313 lexer.ttype == CQLLexer.TT_NE ||
314 lexer.ttype == CQLLexer.TT_EQEQ);
317 private void match(int token)
318 throws CQLParseException, IOException {
319 debug("in match(" + lexer.render(token, true) + ")");
320 if (lexer.ttype != token)
321 throw new CQLParseException("expected " +
322 lexer.render(token, true) +
323 ", " + "got " + lexer.render(),
325 int tmp = lexer.nextToken();
326 debug("match() got token=" + lexer.ttype + ", " +
327 "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" +
328 " (tmp=" + tmp + ")");
331 private String matchSymbol(String expected)
332 throws CQLParseException, IOException {
334 debug("in matchSymbol()");
335 if (lexer.ttype == CQLLexer.TT_WORD ||
336 lexer.ttype == CQLLexer.TT_NUMBER ||
337 lexer.ttype == '"' ||
338 // The following is a complete list of keywords. Because
339 // they're listed here, they can be used unquoted as
340 // indexes, terms, prefix names and prefix identifiers.
341 // ### Instead, we should ask the lexer whether what we
342 // have is a keyword, and let the knowledge reside there.
343 lexer.ttype == CQLLexer.TT_AND ||
344 lexer.ttype == CQLLexer.TT_OR ||
345 lexer.ttype == CQLLexer.TT_NOT ||
346 lexer.ttype == CQLLexer.TT_PROX ||
347 lexer.ttype == CQLLexer.TT_SORTBY) {
348 String symbol = (lexer.ttype == CQLLexer.TT_NUMBER) ?
349 lexer.render() : lexer.sval;
354 throw new CQLParseException("expected " + expected + ", " +
355 "got " + lexer.render(), par.getPosition());
360 * Simple test-harness for the CQLParser class.
362 * Reads a CQL query either from its command-line argument, if
363 * there is one, or standard input otherwise. So these two
364 * invocations are equivalent:
366 * CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
367 * echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
369 * The test-harness parses the supplied query and renders is as
370 * XCQL, so that both of the invocations above produce the
375 * <value>and</value>
379 * <value>or</value>
381 * <searchClause>
382 * <index>au</index>
384 * <value>=</value>
386 * <term>Kerninghan</term>
387 * </searchClause>
388 * <searchClause>
389 * <index>au</index>
391 * <value>=</value>
393 * <term>Ritchie</term>
394 * </searchClause>
396 * <searchClause>
397 * <index>ti</index>
399 * <value>=</value>
401 * <term>Unix</term>
402 * </searchClause>
407 * CQL version 1.1 (default version 1.2)
409 * Debug mode: extra output written to stderr.
411 * Causes the output to be written in CQL rather than XCQL - that
412 * is, a query equivalent to that which was input, is output. In
413 * effect, the test harness acts as a query canonicaliser.
415 * The input query, either as XCQL [default] or CQL [if the
416 * <TT>-c</TT> option is supplied].
418 public static void main (String[] args) {
419 char mode = 'x'; // x=XCQL, c=CQL, p=PQF
422 List<String> argv = new ArrayList<String>();
423 for (int i = 0; i < args.length; i++) {
427 int compat = V1POINT2;
428 if (argv.size() > 0 && argv.get(0).equals("-1")) {
433 if (argv.size() > 0 && argv.get(0).equals("-d")) {
438 if (argv.size() > 0 && argv.get(0).equals("-c")) {
441 } else if (argv.size() > 1 && argv.get(0).equals("-p")) {
444 pfile = (String) argv.get(0);
448 if (argv.size() > 1) {
449 System.err.println("Usage: CQLParser [-1] [-d] [-c] " +
450 "[-p <pqf-properties> [<CQL-query>]");
451 System.err.println("If unspecified, query is read from stdin");
456 if (argv.size() == 1) {
457 cql = (String) argv.get(0);
459 byte[] bytes = new byte[10000];
461 // Read in the whole of standard input in one go
462 int nbytes = System.in.read(bytes);
463 } catch (IOException ex) {
464 System.err.println("Can't read query: " + ex.getMessage());
467 cql = new String(bytes);
470 CQLParser parser = new CQLParser(compat);
471 parser.registerCustomRelation("@");
474 root = parser.parse(cql);
475 } catch (CQLParseException ex) {
476 System.err.println("Syntax error: " + ex.getMessage());
478 } catch (IOException ex) {
479 System.err.println("Can't compile query: " + ex.getMessage());
485 System.out.println(root.toCQL());
486 } else if (mode == 'p') {
487 InputStream f = new FileInputStream(pfile);
489 throw new FileNotFoundException(pfile);
491 Properties config = new Properties();
494 System.out.println(root.toPQF(config));
496 System.out.print(root.toXCQL());
498 } catch (IOException ex) {
499 System.err.println("Can't render query: " + ex.getMessage());
501 } catch (UnknownIndexException ex) {
502 System.err.println("Unknown index: " + ex.getMessage());
504 } catch (UnknownRelationException ex) {
505 System.err.println("Unknown relation: " + ex.getMessage());
507 } catch (UnknownRelationModifierException ex) {
508 System.err.println("Unknown relation modifier: " +
511 } catch (UnknownPositionException ex) {
512 System.err.println("Unknown position: " + ex.getMessage());
514 } catch (PQFTranslationException ex) {
515 // We catch all of this class's subclasses, so --
516 throw new Error("can't get a PQFTranslationException");