-// $Id: CQLParser.java,v 1.6 2002-10-25 16:11:05 mike Exp $
+// $Id: CQLParser.java,v 1.7 2002-10-25 16:56:43 mike Exp $
package org.z3950.zing.cql;
import java.util.Properties;
import java.io.InputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.io.StreamTokenizer;
* Compiles a CQL string into a parse tree ...
* ###
*
- * @version $Id: CQLParser.java,v 1.6 2002-10-25 16:11:05 mike Exp $
+ * @version $Id: CQLParser.java,v 1.7 2002-10-25 16:56:43 mike Exp $
* @see <A href="http://zing.z3950.org/cql/index.html"
* >http://zing.z3950.org/cql/index.html</A>
*/
}
public CQLNode parse(String cql)
- throws FileNotFoundException, IOException {
+ throws CQLParseException, IOException {
this.cql = cql;
st = new StreamTokenizer(new StringReader(cql));
- // ### these settings are wrong
- st.wordChars('/', '/');
- st.wordChars('0', '9'); // ### but 1 is still recognised as TT_NUM
- st.wordChars('.', '.');
- st.wordChars('-', '-');
st.ordinaryChar('=');
- st.ordinaryChar(',');
+ st.ordinaryChar('<');
+ st.ordinaryChar('>');
+ st.ordinaryChar('/');
st.ordinaryChar('(');
st.ordinaryChar(')');
-// int token;
-// while ((token = st.nextToken()) != st.TT_EOF) {
-// System.out.println("token=" + token + ", " +
-// "nval=" + st.nval + ", " +
-// "sval=" + st.sval);
-// }
-
- st.nextToken();
- CQLNode root;
- try {
- root = parse_expression();
- } catch (CQLParseException ex) {
- System.err.println("### Oops: " + ex);
- return null;
+ if (false) {
+ // Lexical debug
+ int token;
+ while ((token = st.nextToken()) != st.TT_EOF) {
+ System.out.println("token=" + token + ", " +
+ "nval=" + st.nval + ", " +
+ "sval=" + st.sval);
+ }
+ System.exit(0);
}
- if (st.ttype != st.TT_EOF) {
- System.err.println("### Extra bits: " + render(st));
- return null;
- }
+ st.nextToken();
+ System.err.println("*about to parse_query()");
+ CQLNode root = parse_query();
+ if (st.ttype != st.TT_EOF)
+ throw new CQLParseException("junk after end: " + render(st));
return root;
}
- private CQLNode parse_expression()
+ private CQLNode parse_query()
throws CQLParseException, IOException {
- CQLNode term = parse_term();
+ System.err.println("*in parse_query()");
+ CQLNode term = parse_term();
while (st.ttype == st.TT_WORD) {
String op = st.sval.toLowerCase();
+ System.err.println("*checking op '" + op + "'");
if (st.sval.equals("and")) {
match(st.TT_WORD);
CQLNode term2 = parse_term();
CQLNode term2 = parse_term();
term = new CQLNotNode(term, term2);
}
+ // ### Need to handle "prox"
}
+ System.err.println("*no more ops");
return term;
}
private CQLNode parse_term()
throws CQLParseException, IOException {
+ System.err.println("*in parse_term()");
if (st.ttype == '(') {
match('(');
- CQLNode expr = parse_expression();
+ CQLNode expr = parse_query();
match(')');
return expr;
}
+ System.err.println("*not a parenthesised term");
+ // ### Need to parse qualifier-relation pairs
String word = st.sval;
- return new CQLTermNode("x", "=", word);
+ match(st.ttype);
+ CQLTermNode node = new CQLTermNode("x", "=", word);
+ System.err.println("*made term node " + node);
+ return node;
}
private void match(int token)
throws CQLParseException, IOException {
+ System.err.println("*in match(" + render(st, token, null) + ")");
if (st.ttype != token)
throw new CQLParseException("expected " + render(st, token, null) +
", " + "got " + render(st));
} else if (token == st.TT_EOL) {
return "EOL";
} else if (token == st.TT_NUMBER) {
- return "number";
+ return "number: " + st.nval;
} else if (token == st.TT_WORD) {
- return "word";
- } else if (token == '"' && token == '\'') {
- return "string";
+ return "word: \"" + st.sval + "\"";
+ } else if (token == '"' || token == '\'') {
+ return "string: \"" + st.sval + "\"";
}
return "'" + String.valueOf((char) token) + "'";
System.exit(1);
}
- byte[] bytes = new byte[1000];
+ byte[] bytes = new byte[10000];
try {
+ // Read in the whole of standard input in one go
int nbytes = System.in.read(bytes);
} catch (java.io.IOException ex) {
System.err.println("Can't read query: " + ex);
CQLNode root;
try {
root = parser.parse(cql);
+ System.err.println("root='" + root + "'");
System.out.println(root.toXCQL(0));
+ } catch (CQLParseException ex) {
+ System.err.println("Syntax error: " + ex);
+ System.exit(3);
} catch (java.io.IOException ex) {
System.err.println("Can't compile query: " + ex);
- System.exit(3);
+ System.exit(4);
}
}
}
-// $Id: CQLTermNode.java,v 1.2 2002-10-25 16:01:26 mike Exp $
+// $Id: CQLTermNode.java,v 1.3 2002-10-25 16:56:43 mike Exp $
package org.z3950.zing.cql;
* Represents a terminal node in a CQL parse-tree ...
* ###
*
- * @version $Id: CQLTermNode.java,v 1.2 2002-10-25 16:01:26 mike Exp $
+ * @version $Id: CQLTermNode.java,v 1.3 2002-10-25 16:56:43 mike Exp $
*/
public class CQLTermNode extends CQLNode {
private String qualifier;
private String relation;
- private String value;
+ private String term;
- public CQLTermNode(String qualifier, String relation, String value) {
+ public CQLTermNode(String qualifier, String relation, String term) {
this.qualifier = qualifier;
this.relation = relation;
- this.value = value;
+ this.term = term;
}
String toXCQL(int level) {
return (indent(level) + "<searchClause>\n" +
indent(level+1) + "<index>" + qualifier + "<index>\n" +
indent(level+1) + "<relation>" + relation + "<relation>\n" +
- indent(level+1) + "<term>" + value + "<term>\n" +
+ indent(level+1) + "<term>" + term + "<term>\n" +
indent(level) + "</searchClause>\n");
}
String toCQL() {
- String res = value;
+ String quotedTerm = term;
- if (res.indexOf('"') != -1) {
+ if (quotedTerm.indexOf('"') != -1) {
// ### precede each '"' with a '/'
}
- if (res.indexOf('"') != -1 ||
- res.indexOf(' ') != -1 ||
- res.indexOf('\t') != -1 ||
- res.indexOf('=') != -1 ||
- res.indexOf('<') != -1 ||
- res.indexOf('>') != -1 ||
- res.indexOf('/') != -1 ||
- res.indexOf('(') != -1 ||
- res.indexOf(')') != -1) {
- res = '"' + res + '"';
+ // ### There must be a better way ...
+ if (quotedTerm.indexOf('"') != -1 ||
+ quotedTerm.indexOf(' ') != -1 ||
+ quotedTerm.indexOf('\t') != -1 ||
+ quotedTerm.indexOf('=') != -1 ||
+ quotedTerm.indexOf('<') != -1 ||
+ quotedTerm.indexOf('>') != -1 ||
+ quotedTerm.indexOf('/') != -1 ||
+ quotedTerm.indexOf('(') != -1 ||
+ quotedTerm.indexOf(')') != -1) {
+ quotedTerm = '"' + quotedTerm + '"';
}
// ### The qualifier may need quoting.
// ### We don't always need spaces around `relation'.
- return qualifier + " " + relation + " " + value;
+ return qualifier + " " + relation + " " + quotedTerm;
}
}