2 package org.z3950.zing.cql;
3 import java.util.Properties;
4 import java.util.Random;
5 import java.io.InputStream;
6 import java.io.FileInputStream;
7 import java.io.FileNotFoundException;
11 * A generator that produces random CQL queries.
13 * Why is that useful? Mainly to produce test-cases for CQL parsers
14 * (including the <TT>CQLParser</TT> class in this package): you can
15 * generate a random search tree, render it to XCQL and remember the
16 * result. Then decompile the tree to CQL, feed the generated CQL to
17 * the parser of your choice, and check that the XCQL it comes up with
18 * is the same what you got from your initial rendering.
20 * This code is based on the same grammar as the <TT>CQLParser</TT> class in
21 * this distribution - there is a <TT>generate_<I>x</I>()</TT> method
22 * for each grammar element <I>X</I>.
24 * @see <A href="http://zing.z3950.org/cql/index.html"
25 * >http://zing.z3950.org/cql/index.html</A>
27 public class CQLGenerator {
28 private Properties params;
30 static private boolean DEBUG = false;
33 * Creates a new CQL generator with the specified parameters.
36 * A <TT>Properties</TT> table containing configuration
37 * parameters for the queries to be generated by this generator.
38 * Recognised parameters are:
41 * <DT><TT>seed</TT></DT>
43 * If specified, this is a <TT>long</TT> used to seed the
44 * random number generator, so that the CQL generator can be
45 * run repeatably, giving the same results each time. If it's
46 * omitted, then no seed is explicitly specified, and the
47 * results of each run will be different (so long as you don't
48 * run it more that 2^32 times :-)
51 * <DT><TT>complexQuery</TT></DT>
53 * [mandatory] A floating-point number between 0.0 and 1.0,
54 * indicating the probability for each <TT>cql-query</TT> node
55 * that it will be expanded into a ``complex query''
56 * (<TT>cql-query boolean search-clause</TT>) rather
57 * than a <TT>search-clause</TT>.
60 * <DT><TT>complexClause</TT></DT>
62 * [mandatory] A floating-point number between 0.0 and 1.0,
63 * indicating the probability for each <TT>search-clause</TT>
64 * node that it will be expanded into a full sub-query rather
65 * than an <TT>[ index relation ] term</TT> triplet.
68 * <DT><TT>proxOp</TT></DT>
70 * [mandatory] A floating-point number between 0.0 and 1.0,
71 * indicating the probability that each boolean operator will
72 * be chosen to be proximity operation; otherwise, the three
73 * simpler boolean operations (<TT>and</TT>, <TT>or</TT> and
74 * <TT>not</TT>) are chosen with equal probability.
77 * <DT><TT>equalsRelation</TT></DT>
79 * [mandatory] A floating-point number between 0.0 and 1.0,
80 * indicating the probability that each relation will be chosen
81 * to be <TT>=</TT> - this is treated as a special case, since
82 * it's likely to be by far the most common relation in
83 * ``real life'' searches.
86 * <DT><TT>numericRelation</TT></DT>
88 * [mandatory] A floating-point number between 0.0 and 1.0,
89 * indicating the probability that a relation, having chosen
90 * not to be <TT>=</TT>, is instead chosen to be one of the six
91 * numeric relations (<TT><</TT>, <TT>></TT>,
92 * <TT><=</TT>, <TT>>=</TT>, <TT><></TT> and
98 public CQLGenerator(Properties params) {
100 String seed = params.getProperty("seed");
102 rnd = new Random(new Long(seed).longValue());
107 private static void debug(String str) {
109 System.err.println("DEBUG: " + str);
113 * Generates a single random CQL query.
115 * Uses the parameters that were associated with the generator
116 * when it was created. You are free to create as many random
117 * queries as you wish from a single generator; each of them will
118 * use the same parameters.
121 * A <TT>CQLNode</TT> that is the root of the generated tree.
122 * That tree may be rendered in XCQL using its <TT>toXCQL()</TT>
123 * method, or decompiled into CQL using its <TT>toCQL</TT>
126 public CQLNode generate() throws MissingParameterException {
127 return generate_cql_query();
130 private CQLNode generate_cql_query() throws MissingParameterException {
131 if (!maybe("complexQuery")) {
132 return generate_search_clause();
135 CQLNode node1 = generate_cql_query();
136 CQLNode node2 = generate_search_clause();
137 // ### should generate prefix-mapping nodes
138 if (maybe("proxOp")) {
139 // ### generate proximity nodes
141 switch (rnd.nextInt(3)) {
142 case 0: return new CQLAndNode(node1, node2, new ModifierSet("and"));
143 case 1: return new CQLOrNode (node1, node2, new ModifierSet("or"));
144 case 2: return new CQLNotNode(node1, node2, new ModifierSet("not"));
148 return generate_search_clause();
151 private CQLNode generate_search_clause() throws MissingParameterException {
152 if (maybe("complexClause")) {
153 return generate_cql_query();
156 // ### Should sometimes generate index/relation-free terms
157 String index = generate_index();
158 CQLRelation relation = generate_relation();
159 String term = generate_term();
161 return new CQLTermNode(index, relation, term);
164 // ### Should probably be more configurable
165 private String generate_index() {
166 String index = ""; // shut up compiler warning
167 if (rnd.nextInt(2) == 0) {
168 switch (rnd.nextInt(3)) {
169 case 0: index = "dc.author"; break;
170 case 1: index = "dc.title"; break;
171 case 2: index = "dc.subject"; break;
174 switch (rnd.nextInt(4)) {
175 case 0: index = "bath.author"; break;
176 case 1: index = "bath.title"; break;
177 case 2: index = "bath.subject"; break;
178 case 3: index = "foo>bar"; break;
185 private CQLRelation generate_relation() throws MissingParameterException {
186 String base = generate_base_relation();
187 CQLRelation rel = new CQLRelation(base);
188 // ### should generate modifiers too
192 private String generate_base_relation() throws MissingParameterException {
193 if (maybe("equalsRelation")) {
195 } else if (maybe("numericRelation")) {
196 return generate_numeric_relation();
198 switch (rnd.nextInt(3)) {
199 case 0: return "within";
200 case 1: return "all";
201 case 2: return "any";
206 return ""; // shut up compiler warning
209 // ### could read candidate terms from /usr/dict/words
210 // ### should introduce wildcard characters
211 // ### should generate multi-word terms
212 private String generate_term() {
213 switch (rnd.nextInt(10)) {
214 case 0: return "cat";
215 case 1: return "\"cat\"";
216 case 2: return "comp.os.linux";
217 case 3: return "xml:element";
218 case 4: return "<xml.element>";
219 case 5: return "prox/word/>=/5";
221 case 7: return "frog fish";
222 case 8: return "the complete dinosaur";
223 case 9: return "foo*bar";
227 return ""; // shut up compiler warning
230 private String generate_numeric_relation() {
231 switch (rnd.nextInt(6)) {
241 return ""; // shut up compiler warning
244 boolean maybe(String param) throws MissingParameterException {
245 String probability = params.getProperty(param);
246 if (probability == null)
247 throw new MissingParameterException(param);
249 double dice = rnd.nextDouble();
250 double threshhold = new Double(probability).doubleValue();
251 boolean res = dice < threshhold;
252 debug("dice=" + String.valueOf(dice).substring(0, 8) +
253 " vs. " + threshhold + "='" + param + "': " + res);
259 * A simple test-harness for the generator.
261 * It generates a single random query using the parameters
262 * specified in a nominated properties file, plus any additional
263 * <I>name value</I> pairs provided on the command-line, and
264 * decompiles it into CQL which is written to standard output.
267 * <TT>java org.z3950.zing.cql.CQLGenerator
268 * etc/generate.properties seed 18398</TT>,
269 * where the file <TT>generate.properties</TT> contains:<PRE>
273 * numericRelation=0.7
277 * ((dc.author = "<xml.element>") or (bath.title = cat)) and
278 * (dc.subject >= "the complete dinosaur")
282 * The name of a properties file from which to read the
283 * configuration parameters (see above).
285 * The name of a configuration parameter.
287 * The value to assign to the configuration parameter named in
288 * the immediately preceding command-line argument.
290 * A CQL query expressed in a form that should be comprehensible
291 * to all conformant CQL compilers.
293 public static void main (String[] args) throws Exception {
294 if (args.length % 2 != 1) {
295 System.err.println("Usage: CQLGenerator <props-file> "+
296 "[<name> <value>]...");
300 String configFile = args[0];
301 InputStream f = new FileInputStream(configFile);
303 throw new FileNotFoundException(configFile);
305 Properties params = new Properties();
308 for (int i = 1; i < args.length; i += 2)
309 params.setProperty(args[i], args[i+1]);
311 CQLGenerator generator = new CQLGenerator(params);
312 CQLNode tree = generator.generate();
313 System.out.println(tree.toCQL());