X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Forg%2Fz3950%2Fzing%2Fcql%2FCQLLexer.java;h=2cb0369e282b490c3f09763642e5702ef7acf1cc;hb=561a15208b3be53c0ad3a1eb5524badfc377c478;hp=1dc580eba0a3f9d16483e3381e6fc1cf901292be;hpb=70be8275aabe87c909bf11148bb279c3e1380bb2;p=cql-java-moved-to-github.git

diff --git a/src/org/z3950/zing/cql/CQLLexer.java b/src/org/z3950/zing/cql/CQLLexer.java
index 1dc580e..2cb0369 100644
--- a/src/org/z3950/zing/cql/CQLLexer.java
+++ b/src/org/z3950/zing/cql/CQLLexer.java
@@ -1,35 +1,92 @@
-// $Id: CQLLexer.java,v 1.1 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLLexer.java,v 1.6 2002-11-17 23:29:02 mike Exp $
 
 package org.z3950.zing.cql;
 import java.io.StreamTokenizer;
 import java.io.StringReader;
+import java.util.Hashtable;
 
 
-// This is a trivial subclass for java.io.StreamTokenizer which knows
-// about the multi-character tokens "<=", ">=" and "<>", and includes
-// a render() method.  Used only by CQLParser.
+// This is a semi-trivial subclass for java.io.StreamTokenizer that:
+//	* Has a halfDecentPushBack() method that actually works
+//	* Includes a render() method
+//	* Knows about the multi-character tokens "<=", ">=" and "<>"
+//	* Recognises a set of keywords as tokens in their own right
+//	* Includes some primitive debugging-output facilities
+// It's used only by CQLParser.
 //
 class CQLLexer extends StreamTokenizer {
-    private static boolean DEBUG;
-    static int TT_LE    = 1000;	// The "<=" relation
-    static int TT_GE    = 1001;	// The ">=" relation
-    static int TT_NE    = 1002;	// The "<>" relation
-    static int TT_AND   = 1003;	// The "and" boolean
-    static int TT_OR    = 1004;	// The "or" boolean
-    static int TT_NOT   = 1005;	// The "not" boolean
-    static int TT_PROX  = 1006;	// The "prox" boolean
-    static int TT_ANY   = 1007;	// The "any" relation
-    static int TT_ALL   = 1008;	// The "all" relation
-    static int TT_EXACT = 1009;	// The "exact" relation
+    // New publicly visible token-types
+    static int TT_LE        = 1000;	// The "<=" relation
+    static int TT_GE        = 1001;	// The ">=" relation
+    static int TT_NE        = 1002;	// The "<>" relation
+    static int TT_AND       = 1003;	// The "and" boolean
+    static int TT_OR        = 1004;	// The "or" boolean
+    static int TT_NOT       = 1005;	// The "not" boolean
+    static int TT_PROX      = 1006;	// The "prox" boolean
+    static int TT_ANY       = 1007;	// The "any" relation
+    static int TT_ALL       = 1008;	// The "all" relation
+    static int TT_EXACT     = 1009;	// The "exact" relation
+    static int TT_pWORD     = 1010;	// The "word" proximity unit
+    static int TT_SENTENCE  = 1011;	// The "sentence" proximity unit
+    static int TT_PARAGRAPH = 1012;	// The "paragraph" proximity unit
+    static int TT_ELEMENT   = 1013;	// The "element" proximity unit
+    static int TT_ORDERED   = 1014;	// The "ordered" proximity ordering
+    static int TT_UNORDERED = 1015;	// The "unordered" proximity ordering
+    static int TT_RELEVANT  = 1016;	// The "relevant" relation modifier
+    static int TT_FUZZY     = 1017;	// The "fuzzy" relation modifier
+    static int TT_STEM      = 1018;	// The "stem" relation modifier
+    static int TT_SCR       = 1019;	// The server choice relation
+    static int TT_PHONETIC  = 1020;	// The "phonetic" relation modifier
+
+    // Support for keywords.  It would be nice to compile this linear
+    // list into a Hashtable, but it's hard to store ints as hash
+    // values, and next to impossible to use them as hash keys.  So
+    // we'll just scan the (very short) list every time we need to do
+    // a lookup.
+    private class Keyword {
+	int token;
+	String keyword;
+	Keyword(int token, String keyword) {
+	    this.token = token;
+	    this.keyword = keyword;
+	}
+    }
+    // This should logically be static, but Java won't allow it  :-P
+    private Keyword[] keywords = {
+	new Keyword(TT_AND, "and"),
+	new Keyword(TT_OR,  "or"),
+	new Keyword(TT_NOT, "not"),
+	new Keyword(TT_PROX, "prox"),
+	new Keyword(TT_ANY, "any"),
+	new Keyword(TT_ALL, "all"),
+	new Keyword(TT_EXACT, "exact"),
+	new Keyword(TT_pWORD, "word"),
+	new Keyword(TT_SENTENCE, "sentence"),
+	new Keyword(TT_PARAGRAPH, "paragraph"),
+	new Keyword(TT_ELEMENT, "element"),
+	new Keyword(TT_ORDERED, "ordered"),
+	new Keyword(TT_UNORDERED, "unordered"),
+	new Keyword(TT_RELEVANT, "relevant"),
+	new Keyword(TT_FUZZY, "fuzzy"),
+	new Keyword(TT_STEM, "stem"),
+	new Keyword(TT_SCR, "scr"),
+	new Keyword(TT_PHONETIC, "phonetic"),
+    };
 
     // For halfDecentPushBack() and the code at the top of nextToken()
     private static int TT_UNDEFINED = -1000;
-    int saved_ttype = TT_UNDEFINED;
-    double saved_nval;
-    String saved_sval;
+    private int saved_ttype = TT_UNDEFINED;
+    private double saved_nval;
+    private String saved_sval;
+
+    // Controls debugging output
+    private static boolean DEBUG;
 
     CQLLexer(String cql, boolean lexdebug) {
 	super(new StringReader(cql));
+	wordChars('!', '?');	// ASCII-dependency!
+	wordChars('[', '`');	// ASCII-dependency!
+	quoteChar('"');
 	ordinaryChar('=');
 	ordinaryChar('<');
 	ordinaryChar('>');
@@ -37,6 +94,7 @@ class CQLLexer extends StreamTokenizer {
 	ordinaryChar('(');
 	ordinaryChar(')');
 	wordChars('\'', '\''); // prevent this from introducing strings
+	parseNumbers();
 	DEBUG = lexdebug;
     }
 
@@ -113,23 +171,11 @@ class CQLLexer extends StreamTokenizer {
     //
     public int underlyingNextToken() throws java.io.IOException {
 	super.nextToken();
-	if (ttype == TT_WORD) {
-	    if (sval.equalsIgnoreCase("and")) {
-		ttype = TT_AND;
-	    } else if (sval.equalsIgnoreCase("or")) {
-		ttype = TT_OR;
-	    } else if (sval.equalsIgnoreCase("not")) {
-		ttype = TT_NOT;
-	    } else if (sval.equalsIgnoreCase("prox")) {
-		ttype = TT_PROX;
-	    } else if (sval.equalsIgnoreCase("any")) {
-		ttype = TT_ANY;
-	    } else if (sval.equalsIgnoreCase("all")) {
-		ttype = TT_ALL;
-	    } else if (sval.equalsIgnoreCase("exact")) {
-		ttype = TT_EXACT;
-	    }
-	}
+	if (ttype == TT_WORD)
+	    for (int i = 0; i < keywords.length; i++)
+		if (sval.equalsIgnoreCase(keywords[i].keyword))
+		    ttype = keywords[i].token;
+
 	return ttype;
     }
 
@@ -142,7 +188,7 @@ class CQLLexer extends StreamTokenizer {
 	if (token == TT_EOF) {
 	    return "EOF";
 	} else if (token == TT_NUMBER) {
-	    return "number: " + nval;
+	    return new Integer((int) nval).toString();
 	} else if (token == TT_WORD) {
 	    return "word: " + sval;
 	} else if (token == '"') {
@@ -153,31 +199,43 @@ class CQLLexer extends StreamTokenizer {
 	    return ">=";
 	} else if (token == TT_NE) {
 	    return "<>";
-	} else if (token == TT_AND) {
-	    return "and";
-	} else if (token == TT_OR) {
-	    return "or";
-	} else if (token == TT_NOT) {
-	    return "not";
-	} else if (token == TT_PROX) {
-	    return "prox";
-	} else if (token == TT_ANY) {
-	    return "any";
-	} else if (token == TT_ALL) {
-	    return "all";
-	} else if (token == TT_EXACT) {
-	    return "exact";
 	}
 
+	// Check whether its associated with one of the keywords
+	for (int i = 0; i < keywords.length; i++)
+	    if (token == keywords[i].token)
+		return keywords[i].keyword;
+
+	// Otherwise it must be a single character, such as '(' or '/'.
 	String res = String.valueOf((char) token);
 	if (quoteChars) res = "'" + res + "'";
         return res;
     }
 
     public static void main(String[] args) throws Exception {
-	CQLLexer lexer = new CQLLexer(args[0], true);
-	int token;
+	if (args.length > 1) {
+	    System.err.println("Usage: CQLLexer [<CQL-query>]");
+	    System.err.println("If unspecified, query is read from stdin");
+	    System.exit(1);
+	}
 
+	String cql;
+	if (args.length == 1) {
+	    cql = args[0];
+	} else {
+	    byte[] bytes = new byte[10000];
+	    try {
+		// Read in the whole of standard input in one go
+		int nbytes = System.in.read(bytes);
+	    } catch (java.io.IOException ex) {
+		System.err.println("Can't read query: " + ex.getMessage());
+		System.exit(2);
+	    }
+	    cql = new String(bytes);
+	}
+
+	CQLLexer lexer = new CQLLexer(cql, true);
+	int token;
 	while ((token = lexer.nextToken()) != TT_EOF) {
 	    // Nothing to do: debug() statements render tokens for us
 	}