1 // $Id: CQLGenerator.java,v 1.9 2007-07-03 15:41:35 mike Exp $
3 package org.z3950.zing.cql;
4 import java.util.Properties;
5 import java.util.Random;
6 import java.io.InputStream;
7 import java.io.FileInputStream;
8 import java.io.FileNotFoundException;
12 * A generator that produces random CQL queries.
14 * Why is that useful? Mainly to produce test-cases for CQL parsers
15 * (including the <TT>CQLParser</TT> class in this package): you can
16 * generate a random search tree, render it to XCQL and remember the
17 * result. Then decompile the tree to CQL, feed the generated CQL to
18 * the parser of your choice, and check that the XCQL it comes up with
19 * is the same what you got from your initial rendering.
21 * This code is based on the same grammar as the <TT>CQLParser</TT> class in
22 * this distribution - there is a <TT>generate_<I>x</I>()</TT> method
23 * for each grammar element <I>X</I>.
25 * @version $Id: CQLGenerator.java,v 1.9 2007-07-03 15:41:35 mike Exp $
26 * @see <A href="http://zing.z3950.org/cql/index.html"
27 * >http://zing.z3950.org/cql/index.html</A>
29 public class CQLGenerator {
32 static private boolean DEBUG = false;
35 * Creates a new CQL generator with the specified parameters.
38 * A <TT>Properties</TT> table containing configuration
39 * parameters for the queries to be generated by this generator.
40 * Recognised parameters are:
43 * <DT><TT>seed</TT></DT>
45 * If specified, this is a <TT>long</TT> used to seed the
46 * random number generator, so that the CQL generator can be
47 * run repeatably, giving the same results each time. If it's
48 * omitted, then no seed is explicitly specified, and the
49 * results of each run will be different (so long as you don't
50 * run it more that 2^32 times :-)
53 * <DT><TT>complexQuery</TT></DT>
55 * [mandatory] A floating-point number between 0.0 and 1.0,
56 * indicating the probability for each <TT>cql-query</TT> node
57 * that it will be expanded into a ``complex query''
58 * (<TT>cql-query boolean search-clause</TT>) rather
59 * than a <TT>search-clause</TT>.
62 * <DT><TT>complexClause</TT></DT>
64 * [mandatory] A floating-point number between 0.0 and 1.0,
65 * indicating the probability for each <TT>search-clause</TT>
66 * node that it will be expanded into a full sub-query rather
67 * than an <TT>[ index relation ] term</TT> triplet.
70 * <DT><TT>proxOp</TT></DT>
72 * [mandatory] A floating-point number between 0.0 and 1.0,
73 * indicating the probability that each boolean operator will
74 * be chosen to be proximity operation; otherwise, the three
75 * simpler boolean operations (<TT>and</TT>, <TT>or</TT> and
76 * <TT>not</TT>) are chosen with equal probability.
79 * <DT><TT>equalsRelation</TT></DT>
81 * [mandatory] A floating-point number between 0.0 and 1.0,
82 * indicating the probability that each relation will be chosen
83 * to be <TT>=</TT> - this is treated as a special case, since
84 * it's likely to be by far the most common relation in
85 * ``real life'' searches.
88 * <DT><TT>numericRelation</TT></DT>
90 * [mandatory] A floating-point number between 0.0 and 1.0,
91 * indicating the probability that a relation, having chosen
92 * not to be <TT>=</TT>, is instead chosen to be one of the six
93 * numeric relations (<TT><</TT>, <TT>></TT>,
94 * <TT><=</TT>, <TT>>=</TT>, <TT><></TT> and
100 public CQLGenerator(Properties params) {
101 this.params = params;
102 String seed = params.getProperty("seed");
104 rnd = new Random(new Long(seed).longValue());
109 private static void debug(String str) {
111 System.err.println("DEBUG: " + str);
115 * Generates a single random CQL query.
117 * Uses the parameters that were associated with the generator
118 * when it was created. You are free to create as many random
119 * queries as you wish from a single generator; each of them will
120 * use the same parameters.
123 * A <TT>CQLNode</TT> that is the root of the generated tree.
124 * That tree may be rendered in XCQL using its <TT>toXCQL()</TT>
125 * method, or decompiled into CQL using its <TT>toCQL</TT>
128 public CQLNode generate() throws MissingParameterException {
129 return generate_cql_query();
132 private CQLNode generate_cql_query() throws MissingParameterException {
133 if (!maybe("complexQuery")) {
134 return generate_search_clause();
137 CQLNode node1 = generate_cql_query();
138 CQLNode node2 = generate_search_clause();
139 // ### should generate prefix-mapping nodes
140 if (maybe("proxOp")) {
141 // ### generate proximity nodes
143 switch (rnd.nextInt(3)) {
144 case 0: return new CQLAndNode(node1, node2, new ModifierSet("and"));
145 case 1: return new CQLOrNode (node1, node2, new ModifierSet("or"));
146 case 2: return new CQLNotNode(node1, node2, new ModifierSet("not"));
150 return generate_search_clause();
153 private CQLNode generate_search_clause() throws MissingParameterException {
154 if (maybe("complexClause")) {
155 return generate_cql_query();
158 // ### Should sometimes generate index/relation-free terms
159 String index = generate_index();
160 CQLRelation relation = generate_relation();
161 String term = generate_term();
163 return new CQLTermNode(index, relation, term);
166 // ### Should probably be more configurable
167 private String generate_index() {
168 String index = ""; // shut up compiler warning
169 if (rnd.nextInt(2) == 0) {
170 switch (rnd.nextInt(3)) {
171 case 0: index = "dc.author"; break;
172 case 1: index = "dc.title"; break;
173 case 2: index = "dc.subject"; break;
176 switch (rnd.nextInt(4)) {
177 case 0: index = "bath.author"; break;
178 case 1: index = "bath.title"; break;
179 case 2: index = "bath.subject"; break;
180 case 3: index = "foo>bar"; break;
187 private CQLRelation generate_relation() throws MissingParameterException {
188 String base = generate_base_relation();
189 CQLRelation rel = new CQLRelation(base);
190 // ### should generate modifiers too
194 private String generate_base_relation() throws MissingParameterException {
195 if (maybe("equalsRelation")) {
197 } else if (maybe("numericRelation")) {
198 return generate_numeric_relation();
200 switch (rnd.nextInt(3)) {
201 case 0: return "within";
202 case 1: return "all";
203 case 2: return "any";
208 return ""; // shut up compiler warning
211 // ### could read candidate terms from /usr/dict/words
212 // ### should introduce wildcard characters
213 // ### should generate multi-word terms
214 private String generate_term() {
215 switch (rnd.nextInt(10)) {
216 case 0: return "cat";
217 case 1: return "\"cat\"";
218 case 2: return "comp.os.linux";
219 case 3: return "xml:element";
220 case 4: return "<xml.element>";
221 case 5: return "prox/word/>=/5";
223 case 7: return "frog fish";
224 case 8: return "the complete dinosaur";
225 case 9: return "foo*bar";
229 return ""; // shut up compiler warning
232 private String generate_numeric_relation() {
233 switch (rnd.nextInt(6)) {
243 return ""; // shut up compiler warning
246 boolean maybe(String param) throws MissingParameterException {
247 String probability = params.getProperty(param);
248 if (probability == null)
249 throw new MissingParameterException(param);
251 double dice = rnd.nextDouble();
252 double threshhold = new Double(probability).doubleValue();
253 boolean res = dice < threshhold;
254 debug("dice=" + String.valueOf(dice).substring(0, 8) +
255 " vs. " + threshhold + "='" + param + "': " + res);
261 * A simple test-harness for the generator.
263 * It generates a single random query using the parameters
264 * specified in a nominated properties file, plus any additional
265 * <I>name value</I> pairs provided on the command-line, and
266 * decompiles it into CQL which is written to standard output.
269 * <TT>java org.z3950.zing.cql.CQLGenerator
270 * etc/generate.properties seed 18398</TT>,
271 * where the file <TT>generate.properties</TT> contains:<PRE>
275 * numericRelation=0.7
279 * ((dc.author = "<xml.element>") or (bath.title = cat)) and
280 * (dc.subject >= "the complete dinosaur")
284 * The name of a properties file from which to read the
285 * configuration parameters (see above).
287 * The name of a configuration parameter.
289 * The value to assign to the configuration parameter named in
290 * the immediately preceding command-line argument.
292 * A CQL query expressed in a form that should be comprehensible
293 * to all conformant CQL compilers.
295 public static void main (String[] args) throws Exception {
296 if (args.length % 2 != 1) {
297 System.err.println("Usage: CQLGenerator <props-file> "+
298 "[<name> <value>]...");
302 String configFile = args[0];
303 InputStream f = new FileInputStream(configFile);
305 throw new FileNotFoundException(configFile);
307 Properties params = new Properties();
310 for (int i = 1; i < args.length; i += 2)
311 params.setProperty(args[i], args[i+1]);
313 CQLGenerator generator = new CQLGenerator(params);
314 CQLNode tree = generator.generate();
315 System.out.println(tree.toCQL());