1 // $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $
3 package org.z3950.zing.cql;
4 import java.io.IOException;
5 import java.util.Properties;
6 import java.io.InputStream;
7 import java.io.FileInputStream;
8 import java.io.FileNotFoundException;
9 import java.util.ArrayList;
10 import java.util.List;
14 * Compiles CQL strings into parse trees of CQLNode subtypes.
16 * @version $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $
17 * @see <A href="http://zing.z3950.org/cql/index.html"
18 * >http://zing.z3950.org/cql/index.html</A>
20 public class CQLParser {
21 private CQLLexer lexer;
22 private int compat; // When false, implement CQL 1.2
23 public static final int V1POINT1 = 12368;
24 public static final int V1POINT2 = 12369;
25 public static final int V1POINT1SORT = 12370;
27 static private boolean DEBUG = false;
28 static private boolean LEXDEBUG = false;
31 * The new parser implements a dialect of CQL specified by the
32 * <tt>compat</tt> argument:
34 * <li>V1POINT1 - CQL version 1.1
36 * <li>V1POINT2 - CQL version 1.2
38 * <li>V1POINT1SORT - CQL version 1.1 but including
39 * <tt>sortby</tt> as specified for CQL 1.2.
43 public CQLParser(int compat) {
48 * The new parser implements CQL 1.2
51 this.compat = V1POINT2;
54 private static void debug(String str) {
56 System.err.println("PARSEDEBUG: " + str);
60 * Compiles a CQL query.
62 * The resulting parse tree may be further processed by hand (see
63 * the individual node-types' documentation for details on the
64 * data structure) or, more often, simply rendered out in the
65 * desired form using one of the back-ends. <TT>toCQL()</TT>
66 * returns a decompiled CQL query equivalent to the one that was
67 * compiled in the first place; <TT>toXCQL()</TT> returns an
68 * XML snippet representing the query; and <TT>toPQF()</TT>
69 * returns the query rendered in Index Data's Prefix Query
72 * @param cql The query
73 * @return A CQLNode object which is the root of a parse
74 * tree representing the query. */
75 public CQLNode parse(String cql)
76 throws CQLParseException, IOException {
77 lexer = new CQLLexer(cql, LEXDEBUG);
80 debug("about to parseQuery()");
81 CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
82 new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
83 if (lexer.ttype != lexer.TT_EOF)
84 throw new CQLParseException("junk after end: " + lexer.render());
89 private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation)
90 throws CQLParseException, IOException {
91 debug("top-level prefix mapping");
93 if (lexer.ttype == '>') {
94 return parsePrefix(index, relation, true);
97 CQLNode node = parseQuery(index, relation);
98 if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
99 lexer.ttype == lexer.TT_SORTBY) {
103 CQLSortNode sortnode = new CQLSortNode(node);
104 while (lexer.ttype != lexer.TT_EOF) {
105 String sortindex = matchSymbol("sort index");
106 ModifierSet ms = gatherModifiers(sortindex);
107 sortnode.addSortIndex(ms);
110 if (sortnode.keys.size() == 0) {
111 throw new CQLParseException("no sort keys");
120 private CQLNode parseQuery(String index, CQLRelation relation)
121 throws CQLParseException, IOException {
122 debug("in parseQuery()");
124 CQLNode term = parseTerm(index, relation);
125 while (lexer.ttype != lexer.TT_EOF &&
126 lexer.ttype != ')' &&
127 lexer.ttype != lexer.TT_SORTBY) {
128 if (lexer.ttype == lexer.TT_AND ||
129 lexer.ttype == lexer.TT_OR ||
130 lexer.ttype == lexer.TT_NOT ||
131 lexer.ttype == lexer.TT_PROX) {
132 int type = lexer.ttype;
133 String val = lexer.sval;
135 ModifierSet ms = gatherModifiers(val);
136 CQLNode term2 = parseTerm(index, relation);
137 term = ((type == lexer.TT_AND) ? new CQLAndNode(term, term2, ms) :
138 (type == lexer.TT_OR) ? new CQLOrNode (term, term2, ms) :
139 (type == lexer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
140 new CQLProxNode(term, term2, ms));
142 throw new CQLParseException("expected boolean, got " +
147 debug("no more ops");
151 private ModifierSet gatherModifiers(String base)
152 throws CQLParseException, IOException {
153 debug("in gatherModifiers()");
155 ModifierSet ms = new ModifierSet(base);
156 while (lexer.ttype == '/') {
158 if (lexer.ttype != lexer.TT_WORD)
159 throw new CQLParseException("expected modifier, "
160 + "got " + lexer.render());
161 String type = lexer.sval.toLowerCase();
163 if (!isSymbolicRelation()) {
164 // It's a simple modifier consisting of type only
165 ms.addModifier(type);
167 // It's a complex modifier of the form type=value
168 String comparision = lexer.render(lexer.ttype, false);
170 String value = matchSymbol("modifier value");
171 ms.addModifier(type, comparision, value);
178 private CQLNode parseTerm(String index, CQLRelation relation)
179 throws CQLParseException, IOException {
180 debug("in parseTerm()");
184 if (lexer.ttype == '(') {
185 debug("parenthesised term");
187 CQLNode expr = parseQuery(index, relation);
190 } else if (lexer.ttype == '>') {
191 return parsePrefix(index, relation, false);
194 debug("non-parenthesised term");
195 word = matchSymbol("index or term");
196 while (lexer.ttype == lexer.TT_WORD && !isRelation()) {
197 word = word + " " + lexer.sval;
198 match(lexer.TT_WORD);
205 String relstr = (lexer.ttype == lexer.TT_WORD ?
206 lexer.sval : lexer.render(lexer.ttype, false));
207 relation = new CQLRelation(relstr);
209 ModifierSet ms = gatherModifiers(relstr);
211 debug("index='" + index + ", " +
212 "relation='" + relation.toCQL() + "'");
215 CQLTermNode node = new CQLTermNode(index, relation, word);
216 debug("made term node " + node.toCQL());
220 private CQLNode parsePrefix(String index, CQLRelation relation,
222 throws CQLParseException, IOException {
223 debug("prefix mapping");
227 String identifier = matchSymbol("prefix-name");
228 if (lexer.ttype == '=') {
231 identifier = matchSymbol("prefix-identifer");
233 CQLNode node = topLevel ?
234 parseTopLevelPrefixes(index, relation) :
235 parseQuery(index, relation);
237 return new CQLPrefixNode(name, identifier, node);
240 private boolean isRelation() {
241 debug("isRelation: checking ttype=" + lexer.ttype +
242 " (" + lexer.render() + ")");
243 if (lexer.ttype == lexer.TT_WORD &&
244 (lexer.sval.indexOf('.') >= 0 ||
245 lexer.sval.equals("any") ||
246 lexer.sval.equals("all") ||
247 lexer.sval.equals("within") ||
248 lexer.sval.equals("encloses") ||
249 lexer.sval.equals("exact") ||
250 (lexer.sval.equals("scr") && compat != V1POINT2) ||
251 (lexer.sval.equals("adj") && compat == V1POINT2)))
254 return isSymbolicRelation();
257 private boolean isSymbolicRelation() {
258 debug("isSymbolicRelation: checking ttype=" + lexer.ttype +
259 " (" + lexer.render() + ")");
260 return (lexer.ttype == '<' ||
261 lexer.ttype == '>' ||
262 lexer.ttype == '=' ||
263 lexer.ttype == lexer.TT_LE ||
264 lexer.ttype == lexer.TT_GE ||
265 lexer.ttype == lexer.TT_NE ||
266 lexer.ttype == lexer.TT_EQEQ);
269 private void match(int token)
270 throws CQLParseException, IOException {
271 debug("in match(" + lexer.render(token, true) + ")");
272 if (lexer.ttype != token)
273 throw new CQLParseException("expected " +
274 lexer.render(token, true) +
275 ", " + "got " + lexer.render());
276 int tmp = lexer.nextToken();
277 debug("match() got token=" + lexer.ttype + ", " +
278 "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" +
279 " (tmp=" + tmp + ")");
282 private String matchSymbol(String expected)
283 throws CQLParseException, IOException {
285 debug("in matchSymbol()");
286 if (lexer.ttype == lexer.TT_WORD ||
287 lexer.ttype == lexer.TT_NUMBER ||
288 lexer.ttype == '"' ||
289 // The following is a complete list of keywords. Because
290 // they're listed here, they can be used unquoted as
291 // indexes, terms, prefix names and prefix identifiers.
292 // ### Instead, we should ask the lexer whether what we
293 // have is a keyword, and let the knowledge reside there.
294 lexer.ttype == lexer.TT_AND ||
295 lexer.ttype == lexer.TT_OR ||
296 lexer.ttype == lexer.TT_NOT ||
297 lexer.ttype == lexer.TT_PROX ||
298 lexer.ttype == lexer.TT_SORTBY) {
299 String symbol = (lexer.ttype == lexer.TT_NUMBER) ?
300 lexer.render() : lexer.sval;
305 throw new CQLParseException("expected " + expected + ", " +
306 "got " + lexer.render());
311 * Simple test-harness for the CQLParser class.
313 * Reads a CQL query either from its command-line argument, if
314 * there is one, or standard input otherwise. So these two
315 * invocations are equivalent:
317 * CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
318 * echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
320 * The test-harness parses the supplied query and renders is as
321 * XCQL, so that both of the invocations above produce the
326 * <value>and</value>
330 * <value>or</value>
332 * <searchClause>
333 * <index>au</index>
335 * <value>=</value>
337 * <term>Kerninghan</term>
338 * </searchClause>
339 * <searchClause>
340 * <index>au</index>
342 * <value>=</value>
344 * <term>Ritchie</term>
345 * </searchClause>
347 * <searchClause>
348 * <index>ti</index>
350 * <value>=</value>
352 * <term>Unix</term>
353 * </searchClause>
358 * CQL version 1.1 (default version 1.2)
360 * Debug mode: extra output written to stderr.
362 * Causes the output to be written in CQL rather than XCQL - that
363 * is, a query equivalent to that which was input, is output. In
364 * effect, the test harness acts as a query canonicaliser.
366 * The input query, either as XCQL [default] or CQL [if the
367 * <TT>-c</TT> option is supplied].
369 public static void main (String[] args) {
370 char mode = 'x'; // x=XCQL, c=CQL, p=PQF
373 List<String> argv = new ArrayList<String>();
374 for (int i = 0; i < args.length; i++) {
378 int compat = V1POINT2;
379 if (argv.size() > 0 && argv.get(0).equals("-1")) {
384 if (argv.size() > 0 && argv.get(0).equals("-d")) {
389 if (argv.size() > 0 && argv.get(0).equals("-c")) {
392 } else if (argv.size() > 1 && argv.get(0).equals("-p")) {
395 pfile = (String) argv.get(0);
399 if (argv.size() > 1) {
400 System.err.println("Usage: CQLParser [-1] [-d] [-c] " +
401 "[-p <pqf-properties> [<CQL-query>]");
402 System.err.println("If unspecified, query is read from stdin");
407 if (argv.size() == 1) {
408 cql = (String) argv.get(0);
410 byte[] bytes = new byte[10000];
412 // Read in the whole of standard input in one go
413 int nbytes = System.in.read(bytes);
414 } catch (IOException ex) {
415 System.err.println("Can't read query: " + ex.getMessage());
418 cql = new String(bytes);
421 CQLParser parser = new CQLParser(compat);
424 root = parser.parse(cql);
425 } catch (CQLParseException ex) {
426 System.err.println("Syntax error: " + ex.getMessage());
428 } catch (IOException ex) {
429 System.err.println("Can't compile query: " + ex.getMessage());
435 System.out.println(root.toCQL());
436 } else if (mode == 'p') {
437 InputStream f = new FileInputStream(pfile);
439 throw new FileNotFoundException(pfile);
441 Properties config = new Properties();
444 System.out.println(root.toPQF(config));
446 System.out.print(root.toXCQL(0));
448 } catch (IOException ex) {
449 System.err.println("Can't render query: " + ex.getMessage());
451 } catch (UnknownIndexException ex) {
452 System.err.println("Unknown index: " + ex.getMessage());
454 } catch (UnknownRelationException ex) {
455 System.err.println("Unknown relation: " + ex.getMessage());
457 } catch (UnknownRelationModifierException ex) {
458 System.err.println("Unknown relation modifier: " +
461 } catch (UnknownPositionException ex) {
462 System.err.println("Unknown position: " + ex.getMessage());
464 } catch (PQFTranslationException ex) {
465 // We catch all of this class's subclasses, so --
466 throw new Error("can't get a PQFTranslationException");