Add first chunk of work towards generic test harness.
-$Id: README,v 1.7 2002-10-31 22:22:01 mike Exp $
+$Id: README,v 1.8 2002-11-01 23:45:28 mike Exp $
cql-java -- a free CQL compiler for Java
restrictive) until I say otherwise.
+TESTING
+-------
+
+Ways of testing the parser and other components include:
+
+* Generate a random tree with CQLGenerate, take a copy, and
+ canonicalise it with CQLparser -c. Since the CQLGenerate output is
+ in canonical form anyway, the before-and-after versions should be
+ identical.
+
+* ... others :-)
+
+
SEE ALSO
--------
TO DO
-----
-* Add proximity support to parser -- just the back-ends left to do.
-
-* Relation modifiers could be limited to known modifiers only.
-
-* Fix CQLParser and CQLLexer shell-script front-ends to elegantly
- handle their classes' test harnesses' ability to read the query from
- the command-line arguments, if any, falling back to stdin if there
- are none.
-
-* Add CQLGenerate shell-script. Allow CQLGenerate test-harness to
- take some arguments on command-line as well as or instead of a
- file.
-
-* Trivial CQLCanonicalise application, which renders out its source
- tree in a canonical form, enabling queries to be diffed for
- semantically significant differences only. Tests can be run by
- generating random trees, canonicalising them, then canonicalising
- them _again_ and checking that the before-and-after results are the
- same.
+* Allow CQLGenerate test-harness to take some of its configuration
+ parameters on the command-line as well as or instead of a file.
* Some niceties for the cql-decompiling back-end:
* don't emit redundant parentheses.
attributes.)
* Consider the utility of yet another back-end that translates a
- cqlnode tree into a type-1 query tree using the jzkit data
+ CQLNode tree into a Type-1 query tree using the JZKit data
structures. That would be nice so that CQL could become a JZKit
- query-type, but you could achieve the same effect by generating PQN,
+ query-type; but you could achieve the same effect by generating PQN,
and running that through JZKit's existing PQN-to-Type-1 compiler.
* Refinements to random query generator:
#!/bin/sh
-# $Id: CQLLexer,v 1.1 2002-10-31 22:22:01 mike Exp $
+# $Id: CQLLexer,v 1.2 2002-11-01 23:45:28 mike Exp $
# Trivial script to invoke the CQLLexer test-harness
-cat | java org.z3950.zing.cql.CQLLexer ${@+"$@"}
+java org.z3950.zing.cql.CQLLexer ${@+"$@"}
#!/bin/sh
-# $Id: CQLParser,v 1.1 2002-10-31 22:22:01 mike Exp $
+# $Id: CQLParser,v 1.2 2002-11-01 23:45:28 mike Exp $
# Trivial script to invoke the CQLParser test-harness
-cat | java org.z3950.zing.cql.CQLParser ${@+"$@"}
+java org.z3950.zing.cql.CQLParser ${@+"$@"}
-// $Id: CQLLexer.java,v 1.2 2002-10-31 22:22:01 mike Exp $
+// $Id: CQLLexer.java,v 1.3 2002-11-01 23:45:28 mike Exp $
package org.z3950.zing.cql;
import java.io.StreamTokenizer;
static int TT_ANY = 1007; // The "any" relation
static int TT_ALL = 1008; // The "all" relation
static int TT_EXACT = 1009; // The "exact" relation
- static int TT_pWORD = 1010; // The "word" proximity unit
+ static int TT_pWORD = 1010; // The "word" proximity unit
static int TT_SENTENCE = 1011; // The "sentence" proximity unit
static int TT_PARAGRAPH = 1012; // The "paragraph" proximity unit
static int TT_ELEMENT = 1013; // The "element" proximity unit
static int TT_ORDERED = 1014; // The "ordered" proximity ordering
static int TT_UNORDERED = 1015; // The "unordered" proximity ordering
+ static int TT_RELEVANT = 1016; // The "relevant" relation modifier
+ static int TT_FUZZY = 1017; // The "fuzzy" relation modifier
+ static int TT_STEM = 1018; // The "stem" relation modifier
// Support for keywords. It would be nice to compile this linear
// list into a Hashtable, but it's hard to store ints as hash
new Keyword(TT_ELEMENT, "element"),
new Keyword(TT_ORDERED, "ordered"),
new Keyword(TT_UNORDERED, "unordered"),
+ new Keyword(TT_RELEVANT, "relevant"),
+ new Keyword(TT_FUZZY, "fuzzy"),
+ new Keyword(TT_STEM, "stem"),
};
// For halfDecentPushBack() and the code at the top of nextToken()
-// $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $
+// $Id: CQLParser.java,v 1.12 2002-11-01 23:45:28 mike Exp $
package org.z3950.zing.cql;
import java.io.IOException;
+import java.util.Vector;
/**
* Compiles a CQL string into a parse tree.
* ##
*
- * @version $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $
+ * @version $Id: CQLParser.java,v 1.12 2002-11-01 23:45:28 mike Exp $
* @see <A href="http://zing.z3950.org/cql/index.html"
* >http://zing.z3950.org/cql/index.html</A>
*/
while (lexer.ttype == '/') {
match('/');
- // ### could insist on known modifiers only
- if (lexer.ttype != lexer.TT_WORD)
+ if (lexer.ttype != lexer.TT_RELEVANT &&
+ lexer.ttype != lexer.TT_FUZZY &&
+ lexer.ttype != lexer.TT_STEM)
throw new CQLParseException("expected relation modifier, "
+ "got " + lexer.render());
relation.addModifier(lexer.sval);
- match(lexer.TT_WORD);
+ match(lexer.ttype);
}
debug("qualifier='" + qualifier + ", " +
// </triple>
//
public static void main (String[] args) {
- if (args.length > 1) {
- System.err.println("Usage: CQLParser [<CQL-query>]");
+ boolean canonicalise = false;
+ Vector argv = new Vector();
+ for (int i = 0; i < args.length; i++) {
+ argv.add(args[i]);
+ }
+
+ if (argv.size() > 0 && argv.get(0).equals("-c")) {
+ canonicalise = true;
+ argv.remove(0);
+ }
+
+ if (argv.size() > 1) {
+ System.err.println("Usage: CQLParser [-c] [<CQL-query>]");
System.err.println("If unspecified, query is read from stdin");
System.exit(1);
}
String cql;
- if (args.length == 1) {
- cql = args[0];
+ if (argv.size() == 1) {
+ cql = (String) argv.get(0);
} else {
byte[] bytes = new byte[10000];
try {
try {
root = parser.parse(cql);
debug("root='" + root + "'");
- System.out.println(root.toCQL());
+ if (canonicalise) {
+ System.out.println(root.toCQL());
+ } else {
+ System.out.println(root.toXCQL(0));
+ }
} catch (CQLParseException ex) {
System.err.println("Syntax error: " + ex.getMessage());
System.exit(3);
--- /dev/null
+# $Id: Makefile,v 1.1 2002-11-01 23:45:28 mike Exp $
+
+sections/01/01.xcql: sections
+# ./mkanswers ../../rob/CQLParser.py
+ ./mkanswers ../../srw/cql/cqlparse3
+
+sections: mktests raw
+ rm -rf sections
+ ./mktests raw
+
+clean:
+ find sections -name '*.xcql' -print | xargs rm -f
+
+distclean:
+ rm -rf sections
+
--- /dev/null
+$Id: README,v 1.1 2002-11-01 23:45:28 mike Exp $
+
+"raw" is the file of test queries as provided by Rob.
+"mktests" parses the raw file into sections and individual queries
+"sections" is the top-level directory created by that program.
+ "01", "02" etc. represent the sections within the raw file
+ "01/name", "02/name", etc. contain the names of the sections.
+ "01/01.cql", "01/02.cql" etc. are the CQL queries themselves.
+"mkanswers" uses a trusted CQL compiler to generate corresponding XCQL.
+ "01/01.xcql", "01/02.xcql" etc. are the compiled XCQL queries.
+"Makefile" controls the building of all this.
--- /dev/null
+#!/usr/bin/perl -w
+
+use IO::File;
+use strict;
+
+if (@ARGV == 0) {
+ print STDERR "Usage: $0 <trusted-CQL-compiler>\n";
+ exit(1);
+}
+my $compiler = $ARGV[0];
+
+while (<sections/*>) {
+ my $sdir = $_;
+ s@sections/@@;
+ print "answering section $_ - ", read_file("$sdir/name"), "\n";
+
+ while (<$sdir/*>) {
+ next if /\/name$/;
+ my $qfile = $_;
+ s@sections/([0-9]+/.*)\.cql@$1@;
+ my $query = read_file($qfile);
+ my $afile = $qfile;
+ $afile =~ s/\.cql$/.cxql/;
+ print " query $_ - $query\n";
+ my $fh = new IO::File("| $compiler > $afile")
+ or die "can't run compiler '$compiler': $!";
+ print $fh $query;
+ $fh->close();
+ }
+}
+
+sub read_file {
+ my($name) = @_;
+
+ my $fh = new IO::File("<$name")
+ or die "can't read '$name': $!";
+ my $contents = join('', <$fh>);
+ $fh->close();
+ return $contents;
+}
+
+sub write_file {
+ my($name, $contents) = @_;
+
+ my $fh = new IO::File(">$name")
+ or die "can't create '$name': $!";
+ $fh->print($contents);
+ $fh->close();
+}
--- /dev/null
+#!/usr/bin/perl -w
+
+use IO::File;
+use strict;
+
+mkdir "sections";
+my $section = 0;
+my $dir;
+my $query;
+
+while (<>) {
+ chomp();
+ s/[ \t]+$//;
+ next if /^$/;
+
+ if (s/^#[ \t]*//) {
+ $section++;
+ $query = 0;
+ $dir = "sections/" . substr("0$section", -2);
+ mkdir $dir;
+ write_file("$dir/name", $_);
+ print "created section $section ($dir) - $_\n";
+ next;
+ }
+
+ die "query before first section header"
+ if !defined $dir;
+
+ $query++;
+ my $filename = $dir . "/" . substr("0$query", -2) . ".cql";
+ write_file($filename, $_);
+ print " added query $query ($filename) - $_\n";
+}
+
+sub write_file {
+ my($name, $contents) = @_;
+
+ my $fh = new IO::File(">$name")
+ or die "can't create '$name': $!";
+ $fh->print($contents);
+ $fh->close();
+}
--- /dev/null
+# Simple
+
+cat
+"cat"
+comp.os.linux
+xml:element
+"<xml:element>"
+"="
+"prox/word/>=/5"
+("cat")
+((dog))
+
+# index relation term
+
+title = "fish"
+title exact fish
+title any fish
+title all fish
+title > 9
+title >= 23
+dc.title any "fish chips"
+dc.title any/stem fish
+dc.fish all/stem/fuzzy "fish chips"
+(title any frog)
+((dc.title any/stem "frog pond"))
+
+# Simple Boolean
+
+cat or dog
+cat and fish
+cat not frog
+(cat not frog)
+"cat" not "fish food"
+xml and "prox/word/"
+fred and any
+((fred or all))
+a or b and c not d
+
+# I/R/T plus Boolean
+
+bath.author any fish and dc.title all "cat dog"
+(title any/stem "fish dog" or and)
+
+# Prox
+
+cat prox hat
+cat prox/word/=/3/ordered hat
+cat prox///3 hat
+"fish food" prox/sentence and
+title all "chips frog" prox/word//5 any
+(dc.author exact "jones" prox///5 title >= "smith")
+((cat prox hat))
+
+# Special characters
+(cat^)
+"cat"
+"^cat says \"fish\""
+"cat*fish"
+cat?dog
+(("^cat*fishdog\"horse?"))
+
+# Nesting Parens
+
+(((cat or dog) or horse) and frog)
+(cat and dog) or (horse and frog)
+(cat and (horse or frog)) and chips
+
+# Lame searches
+
+any or all:stem and all exact any prox/word prox=fuzzy
+(((((((((any)))))))))
+
+
+# Invalid searches [should error]
+
+^
+>
+===
+cat or
+index any
+index any/wrong term
+a prox/wrong b
+()
+(a
+index any fish)
+(cat any dog or ())
+sorry = (mike)