From cae635a47469f62058626e9b8231b231d0861fb0 Mon Sep 17 00:00:00 2001 From: mike Date: Fri, 1 Nov 2002 23:45:28 +0000 Subject: [PATCH] Improve test-harnesses and their associated scripts. Add first chunk of work towards generic test harness. --- README | 40 +++++++-------- bin/CQLLexer | 4 +- bin/CQLParser | 4 +- src/org/z3950/zing/cql/CQLLexer.java | 10 +++- src/org/z3950/zing/cql/CQLParser.java | 37 ++++++++++---- test/Makefile | 16 ++++++ test/README | 11 +++++ test/mkanswers | 49 +++++++++++++++++++ test/mktests | 42 ++++++++++++++++ test/raw | 87 +++++++++++++++++++++++++++++++++ 10 files changed, 262 insertions(+), 38 deletions(-) create mode 100644 test/Makefile create mode 100644 test/README create mode 100755 test/mkanswers create mode 100755 test/mktests create mode 100644 test/raw diff --git a/README b/README index ed5afc8..1cd2bd9 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -$Id: README,v 1.7 2002-10-31 22:22:01 mike Exp $ +$Id: README,v 1.8 2002-11-01 23:45:28 mike Exp $ cql-java -- a free CQL compiler for Java @@ -72,6 +72,19 @@ licence to use. Be good. Assume I'm going with the GPL (most restrictive) until I say otherwise. +TESTING +------- + +Ways of testing the parser and other components include: + +* Generate a random tree with CQLGenerate, take a copy, and + canonicalise it with CQLparser -c. Since the CQLGenerate output is + in canonical form anyway, the before-and-after versions should be + identical. + +* ... others :-) + + SEE ALSO -------- @@ -83,25 +96,8 @@ All the other free CQL compilers everyone's going to write :-) TO DO ----- -* Add proximity support to parser -- just the back-ends left to do. - -* Relation modifiers could be limited to known modifiers only. - -* Fix CQLParser and CQLLexer shell-script front-ends to elegantly - handle their classes' test harnesses' ability to read the query from - the command-line arguments, if any, falling back to stdin if there - are none. - -* Add CQLGenerate shell-script. Allow CQLGenerate test-harness to - take some arguments on command-line as well as or instead of a - file. - -* Trivial CQLCanonicalise application, which renders out its source - tree in a canonical form, enabling queries to be diffed for - semantically significant differences only. Tests can be run by - generating random trees, canonicalising them, then canonicalising - them _again_ and checking that the before-and-after results are the - same. +* Allow CQLGenerate test-harness to take some of its configuration + parameters on the command-line as well as or instead of a file. * Some niceties for the cql-decompiling back-end: * don't emit redundant parentheses. @@ -113,9 +109,9 @@ TO DO attributes.) * Consider the utility of yet another back-end that translates a - cqlnode tree into a type-1 query tree using the jzkit data + CQLNode tree into a Type-1 query tree using the JZKit data structures. That would be nice so that CQL could become a JZKit - query-type, but you could achieve the same effect by generating PQN, + query-type; but you could achieve the same effect by generating PQN, and running that through JZKit's existing PQN-to-Type-1 compiler. * Refinements to random query generator: diff --git a/bin/CQLLexer b/bin/CQLLexer index c7a5822..4967738 100755 --- a/bin/CQLLexer +++ b/bin/CQLLexer @@ -1,6 +1,6 @@ #!/bin/sh -# $Id: CQLLexer,v 1.1 2002-10-31 22:22:01 mike Exp $ +# $Id: CQLLexer,v 1.2 2002-11-01 23:45:28 mike Exp $ # Trivial script to invoke the CQLLexer test-harness -cat | java org.z3950.zing.cql.CQLLexer ${@+"$@"} +java org.z3950.zing.cql.CQLLexer ${@+"$@"} diff --git a/bin/CQLParser b/bin/CQLParser index 9576625..3147d85 100755 --- a/bin/CQLParser +++ b/bin/CQLParser @@ -1,6 +1,6 @@ #!/bin/sh -# $Id: CQLParser,v 1.1 2002-10-31 22:22:01 mike Exp $ +# $Id: CQLParser,v 1.2 2002-11-01 23:45:28 mike Exp $ # Trivial script to invoke the CQLParser test-harness -cat | java org.z3950.zing.cql.CQLParser ${@+"$@"} +java org.z3950.zing.cql.CQLParser ${@+"$@"} diff --git a/src/org/z3950/zing/cql/CQLLexer.java b/src/org/z3950/zing/cql/CQLLexer.java index 52b81f7..ce38eb6 100644 --- a/src/org/z3950/zing/cql/CQLLexer.java +++ b/src/org/z3950/zing/cql/CQLLexer.java @@ -1,4 +1,4 @@ -// $Id: CQLLexer.java,v 1.2 2002-10-31 22:22:01 mike Exp $ +// $Id: CQLLexer.java,v 1.3 2002-11-01 23:45:28 mike Exp $ package org.z3950.zing.cql; import java.io.StreamTokenizer; @@ -26,12 +26,15 @@ class CQLLexer extends StreamTokenizer { static int TT_ANY = 1007; // The "any" relation static int TT_ALL = 1008; // The "all" relation static int TT_EXACT = 1009; // The "exact" relation - static int TT_pWORD = 1010; // The "word" proximity unit + static int TT_pWORD = 1010; // The "word" proximity unit static int TT_SENTENCE = 1011; // The "sentence" proximity unit static int TT_PARAGRAPH = 1012; // The "paragraph" proximity unit static int TT_ELEMENT = 1013; // The "element" proximity unit static int TT_ORDERED = 1014; // The "ordered" proximity ordering static int TT_UNORDERED = 1015; // The "unordered" proximity ordering + static int TT_RELEVANT = 1016; // The "relevant" relation modifier + static int TT_FUZZY = 1017; // The "fuzzy" relation modifier + static int TT_STEM = 1018; // The "stem" relation modifier // Support for keywords. It would be nice to compile this linear // list into a Hashtable, but it's hard to store ints as hash @@ -61,6 +64,9 @@ class CQLLexer extends StreamTokenizer { new Keyword(TT_ELEMENT, "element"), new Keyword(TT_ORDERED, "ordered"), new Keyword(TT_UNORDERED, "unordered"), + new Keyword(TT_RELEVANT, "relevant"), + new Keyword(TT_FUZZY, "fuzzy"), + new Keyword(TT_STEM, "stem"), }; // For halfDecentPushBack() and the code at the top of nextToken() diff --git a/src/org/z3950/zing/cql/CQLParser.java b/src/org/z3950/zing/cql/CQLParser.java index 0aded47..2792aaa 100644 --- a/src/org/z3950/zing/cql/CQLParser.java +++ b/src/org/z3950/zing/cql/CQLParser.java @@ -1,14 +1,15 @@ -// $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $ +// $Id: CQLParser.java,v 1.12 2002-11-01 23:45:28 mike Exp $ package org.z3950.zing.cql; import java.io.IOException; +import java.util.Vector; /** * Compiles a CQL string into a parse tree. * ## * - * @version $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $ + * @version $Id: CQLParser.java,v 1.12 2002-11-01 23:45:28 mike Exp $ * @see http://zing.z3950.org/cql/index.html */ @@ -100,12 +101,13 @@ public class CQLParser { while (lexer.ttype == '/') { match('/'); - // ### could insist on known modifiers only - if (lexer.ttype != lexer.TT_WORD) + if (lexer.ttype != lexer.TT_RELEVANT && + lexer.ttype != lexer.TT_FUZZY && + lexer.ttype != lexer.TT_STEM) throw new CQLParseException("expected relation modifier, " + "got " + lexer.render()); relation.addModifier(lexer.sval); - match(lexer.TT_WORD); + match(lexer.ttype); } debug("qualifier='" + qualifier + ", " + @@ -241,15 +243,26 @@ public class CQLParser { // // public static void main (String[] args) { - if (args.length > 1) { - System.err.println("Usage: CQLParser []"); + boolean canonicalise = false; + Vector argv = new Vector(); + for (int i = 0; i < args.length; i++) { + argv.add(args[i]); + } + + if (argv.size() > 0 && argv.get(0).equals("-c")) { + canonicalise = true; + argv.remove(0); + } + + if (argv.size() > 1) { + System.err.println("Usage: CQLParser [-c] []"); System.err.println("If unspecified, query is read from stdin"); System.exit(1); } String cql; - if (args.length == 1) { - cql = args[0]; + if (argv.size() == 1) { + cql = (String) argv.get(0); } else { byte[] bytes = new byte[10000]; try { @@ -267,7 +280,11 @@ public class CQLParser { try { root = parser.parse(cql); debug("root='" + root + "'"); - System.out.println(root.toCQL()); + if (canonicalise) { + System.out.println(root.toCQL()); + } else { + System.out.println(root.toXCQL(0)); + } } catch (CQLParseException ex) { System.err.println("Syntax error: " + ex.getMessage()); System.exit(3); diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..95a9065 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,16 @@ +# $Id: Makefile,v 1.1 2002-11-01 23:45:28 mike Exp $ + +sections/01/01.xcql: sections +# ./mkanswers ../../rob/CQLParser.py + ./mkanswers ../../srw/cql/cqlparse3 + +sections: mktests raw + rm -rf sections + ./mktests raw + +clean: + find sections -name '*.xcql' -print | xargs rm -f + +distclean: + rm -rf sections + diff --git a/test/README b/test/README new file mode 100644 index 0000000..fff96be --- /dev/null +++ b/test/README @@ -0,0 +1,11 @@ +$Id: README,v 1.1 2002-11-01 23:45:28 mike Exp $ + +"raw" is the file of test queries as provided by Rob. +"mktests" parses the raw file into sections and individual queries +"sections" is the top-level directory created by that program. + "01", "02" etc. represent the sections within the raw file + "01/name", "02/name", etc. contain the names of the sections. + "01/01.cql", "01/02.cql" etc. are the CQL queries themselves. +"mkanswers" uses a trusted CQL compiler to generate corresponding XCQL. + "01/01.xcql", "01/02.xcql" etc. are the compiled XCQL queries. +"Makefile" controls the building of all this. diff --git a/test/mkanswers b/test/mkanswers new file mode 100755 index 0000000..4343b40 --- /dev/null +++ b/test/mkanswers @@ -0,0 +1,49 @@ +#!/usr/bin/perl -w + +use IO::File; +use strict; + +if (@ARGV == 0) { + print STDERR "Usage: $0 \n"; + exit(1); +} +my $compiler = $ARGV[0]; + +while () { + my $sdir = $_; + s@sections/@@; + print "answering section $_ - ", read_file("$sdir/name"), "\n"; + + while (<$sdir/*>) { + next if /\/name$/; + my $qfile = $_; + s@sections/([0-9]+/.*)\.cql@$1@; + my $query = read_file($qfile); + my $afile = $qfile; + $afile =~ s/\.cql$/.cxql/; + print " query $_ - $query\n"; + my $fh = new IO::File("| $compiler > $afile") + or die "can't run compiler '$compiler': $!"; + print $fh $query; + $fh->close(); + } +} + +sub read_file { + my($name) = @_; + + my $fh = new IO::File("<$name") + or die "can't read '$name': $!"; + my $contents = join('', <$fh>); + $fh->close(); + return $contents; +} + +sub write_file { + my($name, $contents) = @_; + + my $fh = new IO::File(">$name") + or die "can't create '$name': $!"; + $fh->print($contents); + $fh->close(); +} diff --git a/test/mktests b/test/mktests new file mode 100755 index 0000000..523bb0f --- /dev/null +++ b/test/mktests @@ -0,0 +1,42 @@ +#!/usr/bin/perl -w + +use IO::File; +use strict; + +mkdir "sections"; +my $section = 0; +my $dir; +my $query; + +while (<>) { + chomp(); + s/[ \t]+$//; + next if /^$/; + + if (s/^#[ \t]*//) { + $section++; + $query = 0; + $dir = "sections/" . substr("0$section", -2); + mkdir $dir; + write_file("$dir/name", $_); + print "created section $section ($dir) - $_\n"; + next; + } + + die "query before first section header" + if !defined $dir; + + $query++; + my $filename = $dir . "/" . substr("0$query", -2) . ".cql"; + write_file($filename, $_); + print " added query $query ($filename) - $_\n"; +} + +sub write_file { + my($name, $contents) = @_; + + my $fh = new IO::File(">$name") + or die "can't create '$name': $!"; + $fh->print($contents); + $fh->close(); +} diff --git a/test/raw b/test/raw new file mode 100644 index 0000000..847b027 --- /dev/null +++ b/test/raw @@ -0,0 +1,87 @@ +# Simple + +cat +"cat" +comp.os.linux +xml:element +"" +"=" +"prox/word/>=/5" +("cat") +((dog)) + +# index relation term + +title = "fish" +title exact fish +title any fish +title all fish +title > 9 +title >= 23 +dc.title any "fish chips" +dc.title any/stem fish +dc.fish all/stem/fuzzy "fish chips" +(title any frog) +((dc.title any/stem "frog pond")) + +# Simple Boolean + +cat or dog +cat and fish +cat not frog +(cat not frog) +"cat" not "fish food" +xml and "prox/word/" +fred and any +((fred or all)) +a or b and c not d + +# I/R/T plus Boolean + +bath.author any fish and dc.title all "cat dog" +(title any/stem "fish dog" or and) + +# Prox + +cat prox hat +cat prox/word/=/3/ordered hat +cat prox///3 hat +"fish food" prox/sentence and +title all "chips frog" prox/word//5 any +(dc.author exact "jones" prox///5 title >= "smith") +((cat prox hat)) + +# Special characters +(cat^) +"cat" +"^cat says \"fish\"" +"cat*fish" +cat?dog +(("^cat*fishdog\"horse?")) + +# Nesting Parens + +(((cat or dog) or horse) and frog) +(cat and dog) or (horse and frog) +(cat and (horse or frog)) and chips + +# Lame searches + +any or all:stem and all exact any prox/word prox=fuzzy +(((((((((any))))))))) + + +# Invalid searches [should error] + +^ +> +=== +cat or +index any +index any/wrong term +a prox/wrong b +() +(a +index any fish) +(cat any dog or ()) +sorry = (mike) -- 1.7.10.4