X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Forg%2Fz3950%2Fzing%2Fcql%2FCQLLexer.java;h=2cb0369e282b490c3f09763642e5702ef7acf1cc;hb=561a15208b3be53c0ad3a1eb5524badfc377c478;hp=1dc580eba0a3f9d16483e3381e6fc1cf901292be;hpb=70be8275aabe87c909bf11148bb279c3e1380bb2;p=cql-java-moved-to-github.git diff --git a/src/org/z3950/zing/cql/CQLLexer.java b/src/org/z3950/zing/cql/CQLLexer.java index 1dc580e..2cb0369 100644 --- a/src/org/z3950/zing/cql/CQLLexer.java +++ b/src/org/z3950/zing/cql/CQLLexer.java @@ -1,35 +1,92 @@ -// $Id: CQLLexer.java,v 1.1 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLLexer.java,v 1.6 2002-11-17 23:29:02 mike Exp $ package org.z3950.zing.cql; import java.io.StreamTokenizer; import java.io.StringReader; +import java.util.Hashtable; -// This is a trivial subclass for java.io.StreamTokenizer which knows -// about the multi-character tokens "<=", ">=" and "<>", and includes -// a render() method. Used only by CQLParser. +// This is a semi-trivial subclass for java.io.StreamTokenizer that: +// * Has a halfDecentPushBack() method that actually works +// * Includes a render() method +// * Knows about the multi-character tokens "<=", ">=" and "<>" +// * Recognises a set of keywords as tokens in their own right +// * Includes some primitive debugging-output facilities +// It's used only by CQLParser. // class CQLLexer extends StreamTokenizer { - private static boolean DEBUG; - static int TT_LE = 1000; // The "<=" relation - static int TT_GE = 1001; // The ">=" relation - static int TT_NE = 1002; // The "<>" relation - static int TT_AND = 1003; // The "and" boolean - static int TT_OR = 1004; // The "or" boolean - static int TT_NOT = 1005; // The "not" boolean - static int TT_PROX = 1006; // The "prox" boolean - static int TT_ANY = 1007; // The "any" relation - static int TT_ALL = 1008; // The "all" relation - static int TT_EXACT = 1009; // The "exact" relation + // New publicly visible token-types + static int TT_LE = 1000; // The "<=" relation + static int TT_GE = 1001; // The ">=" relation + static int TT_NE = 1002; // The "<>" relation + static int TT_AND = 1003; // The "and" boolean + static int TT_OR = 1004; // The "or" boolean + static int TT_NOT = 1005; // The "not" boolean + static int TT_PROX = 1006; // The "prox" boolean + static int TT_ANY = 1007; // The "any" relation + static int TT_ALL = 1008; // The "all" relation + static int TT_EXACT = 1009; // The "exact" relation + static int TT_pWORD = 1010; // The "word" proximity unit + static int TT_SENTENCE = 1011; // The "sentence" proximity unit + static int TT_PARAGRAPH = 1012; // The "paragraph" proximity unit + static int TT_ELEMENT = 1013; // The "element" proximity unit + static int TT_ORDERED = 1014; // The "ordered" proximity ordering + static int TT_UNORDERED = 1015; // The "unordered" proximity ordering + static int TT_RELEVANT = 1016; // The "relevant" relation modifier + static int TT_FUZZY = 1017; // The "fuzzy" relation modifier + static int TT_STEM = 1018; // The "stem" relation modifier + static int TT_SCR = 1019; // The server choice relation + static int TT_PHONETIC = 1020; // The "phonetic" relation modifier + + // Support for keywords. It would be nice to compile this linear + // list into a Hashtable, but it's hard to store ints as hash + // values, and next to impossible to use them as hash keys. So + // we'll just scan the (very short) list every time we need to do + // a lookup. + private class Keyword { + int token; + String keyword; + Keyword(int token, String keyword) { + this.token = token; + this.keyword = keyword; + } + } + // This should logically be static, but Java won't allow it :-P + private Keyword[] keywords = { + new Keyword(TT_AND, "and"), + new Keyword(TT_OR, "or"), + new Keyword(TT_NOT, "not"), + new Keyword(TT_PROX, "prox"), + new Keyword(TT_ANY, "any"), + new Keyword(TT_ALL, "all"), + new Keyword(TT_EXACT, "exact"), + new Keyword(TT_pWORD, "word"), + new Keyword(TT_SENTENCE, "sentence"), + new Keyword(TT_PARAGRAPH, "paragraph"), + new Keyword(TT_ELEMENT, "element"), + new Keyword(TT_ORDERED, "ordered"), + new Keyword(TT_UNORDERED, "unordered"), + new Keyword(TT_RELEVANT, "relevant"), + new Keyword(TT_FUZZY, "fuzzy"), + new Keyword(TT_STEM, "stem"), + new Keyword(TT_SCR, "scr"), + new Keyword(TT_PHONETIC, "phonetic"), + }; // For halfDecentPushBack() and the code at the top of nextToken() private static int TT_UNDEFINED = -1000; - int saved_ttype = TT_UNDEFINED; - double saved_nval; - String saved_sval; + private int saved_ttype = TT_UNDEFINED; + private double saved_nval; + private String saved_sval; + + // Controls debugging output + private static boolean DEBUG; CQLLexer(String cql, boolean lexdebug) { super(new StringReader(cql)); + wordChars('!', '?'); // ASCII-dependency! + wordChars('[', '`'); // ASCII-dependency! + quoteChar('"'); ordinaryChar('='); ordinaryChar('<'); ordinaryChar('>'); @@ -37,6 +94,7 @@ class CQLLexer extends StreamTokenizer { ordinaryChar('('); ordinaryChar(')'); wordChars('\'', '\''); // prevent this from introducing strings + parseNumbers(); DEBUG = lexdebug; } @@ -113,23 +171,11 @@ class CQLLexer extends StreamTokenizer { // public int underlyingNextToken() throws java.io.IOException { super.nextToken(); - if (ttype == TT_WORD) { - if (sval.equalsIgnoreCase("and")) { - ttype = TT_AND; - } else if (sval.equalsIgnoreCase("or")) { - ttype = TT_OR; - } else if (sval.equalsIgnoreCase("not")) { - ttype = TT_NOT; - } else if (sval.equalsIgnoreCase("prox")) { - ttype = TT_PROX; - } else if (sval.equalsIgnoreCase("any")) { - ttype = TT_ANY; - } else if (sval.equalsIgnoreCase("all")) { - ttype = TT_ALL; - } else if (sval.equalsIgnoreCase("exact")) { - ttype = TT_EXACT; - } - } + if (ttype == TT_WORD) + for (int i = 0; i < keywords.length; i++) + if (sval.equalsIgnoreCase(keywords[i].keyword)) + ttype = keywords[i].token; + return ttype; } @@ -142,7 +188,7 @@ class CQLLexer extends StreamTokenizer { if (token == TT_EOF) { return "EOF"; } else if (token == TT_NUMBER) { - return "number: " + nval; + return new Integer((int) nval).toString(); } else if (token == TT_WORD) { return "word: " + sval; } else if (token == '"') { @@ -153,31 +199,43 @@ class CQLLexer extends StreamTokenizer { return ">="; } else if (token == TT_NE) { return "<>"; - } else if (token == TT_AND) { - return "and"; - } else if (token == TT_OR) { - return "or"; - } else if (token == TT_NOT) { - return "not"; - } else if (token == TT_PROX) { - return "prox"; - } else if (token == TT_ANY) { - return "any"; - } else if (token == TT_ALL) { - return "all"; - } else if (token == TT_EXACT) { - return "exact"; } + // Check whether its associated with one of the keywords + for (int i = 0; i < keywords.length; i++) + if (token == keywords[i].token) + return keywords[i].keyword; + + // Otherwise it must be a single character, such as '(' or '/'. String res = String.valueOf((char) token); if (quoteChars) res = "'" + res + "'"; return res; } public static void main(String[] args) throws Exception { - CQLLexer lexer = new CQLLexer(args[0], true); - int token; + if (args.length > 1) { + System.err.println("Usage: CQLLexer []"); + System.err.println("If unspecified, query is read from stdin"); + System.exit(1); + } + String cql; + if (args.length == 1) { + cql = args[0]; + } else { + byte[] bytes = new byte[10000]; + try { + // Read in the whole of standard input in one go + int nbytes = System.in.read(bytes); + } catch (java.io.IOException ex) { + System.err.println("Can't read query: " + ex.getMessage()); + System.exit(2); + } + cql = new String(bytes); + } + + CQLLexer lexer = new CQLLexer(cql, true); + int token; while ((token = lexer.nextToken()) != TT_EOF) { // Nothing to do: debug() statements render tokens for us }