1 /* Copyright (C) 2006, Index Data ApS
2 * See the file LICENSE for details.
3 * $Id: nfa.h,v 1.2 2006-05-03 11:09:59 heikki Exp $
8 * \brief NFA for character set normalizing
10 * The NFA is a character mathcing device. It consists of states
11 * and transitions between them. Each transition has a condition, which
12 * is a range of values.
14 * When matching, we always start at the first state, and find the longest
15 * possible sequence of input characters that match the ranges in the
16 * conditions, and that leads into a terminal state.
23 #include <yaz/yconfig.h>
27 /** \brief Internal character type */
28 typedef unsigned int yaz_nfa_char;
31 /** \brief The NFA itself
32 * The internals are hidden in nfa.c */
33 typedef struct yaz_nfa yaz_nfa;
35 /** \brief A state of the NFA */
36 typedef struct yaz_nfa_state yaz_nfa_state;
38 /** \brief Transition from one state to another */
39 typedef struct yaz_nfa_transition yaz_nfa_transition;
42 /** \brief Initialize the NFA without any states in it
44 * \return a pointer to the newly created NFA
47 yaz_nfa *yaz_nfa_init();
49 /** \brief Destroy the whole thing */
51 yaz_nfa *n /** the nfa to destroy */
54 /** \brief Add a normal state to the NFA.
56 * The first state added will become the starting point.
57 * Returns a pointer to it, which you can safely ignore, or use in building
60 yaz_nfa_state *yaz_nfa_add_state(
61 yaz_nfa *n /** The NFA to add the state to */
65 /** \brief Sets the result pointer to a state
67 * Call with null to clear the pointer.
70 * \retval 1 The state already has a result!
72 int yaz_nfa_set_result(
75 /** The state to which the result is added */
77 /** The result. The NFA does not care what it is, just stores it */
81 /** \brief Gets the result pointer from a state
83 * \retval NULL if no result set
85 void *yaz_nfa_get_result(
86 yaz_nfa *n /** The NFA itself */,
87 yaz_nfa_state *s /** The state whose result you want */);
89 /** \brief Set the backref number to a state.
91 * Each state can be the beginning and/or ending of a backref
92 * sequence. This call sets those flags in the states. After matching,
93 * we can get hold of the backrefs that matched, and use them in our
94 * translations. The backrefs start at 1, not zero!
97 * \param s the state to add to
98 * \param backref_number is the number of the back reference. 0 for clearing
99 * \param is_start is 1 for start of the backref, 0 for end
101 * \retval 1 if the backref is already set
102 * \retval 2 for ending a backref that has not been started
106 int yaz_nfa_set_backref(yaz_nfa *n, yaz_nfa_state *s,
110 /** \brief Get the backref number of a state.
113 * \param s the state to add to
114 * \param is_start is 1 for start of the backref, 0 for end
115 * \return the backref number associated with the state, or 0 if none.
118 int yaz_nfa_get_backref(yaz_nfa *n, yaz_nfa_state *s,
121 /** \brief Add a transition to the NFA.
123 * Add a transition between two existing states. The condition
124 * is (as always) a range of yaz_nfa_chars.
126 * \param from_state which state the transition is from
127 * \param to_state where the transition goes to
128 * \param range_start is the beginning of the range of values
129 * \param range_end is the end of the range of values
131 void yaz_nfa_add_transition(yaz_nfa *n,
132 yaz_nfa_state *from_state,
133 yaz_nfa_state *to_state,
134 yaz_nfa_char range_start,
135 yaz_nfa_char range_end);
137 /** \brief Add an empty (epsilon) transition.
140 * \param from_state which state the transition is from
141 * \param to_state where the transition goes to
143 void yaz_nfa_add_empty_transition( yaz_nfa *n,
144 yaz_nfa_state *from_state,
145 yaz_nfa_state *to_state);
147 /** \brief Add a translation from a range to the NFA.
150 * \param st the state to add this to. If null, adds to the initial state
151 * \param range_start is the beginning of the range of values
152 * \param range_end is the end of the range of values
154 * Checks if we already have a transition like this. If so, does not add
155 * a new one, but returns the target state. Otherwise creates a new state,
156 * and a transition to it.
158 yaz_nfa_state *yaz_nfa_add_range( yaz_nfa *n,
160 yaz_nfa_char range_start,
161 yaz_nfa_char range_end );
163 /** \brief Add a sequence of transitions and states.
165 * Starting from state s (or from the initial state, if s is
166 * null), finds as much of seq as possible and inserts the rest.
167 * \Return the final state
169 yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n,
174 /** \brief Find the longest possible match.
176 * \param n the nfa itself
177 * \param inbuff buffer of input data. Will be incremented when match
178 * \param incharsleft max number of inchars to use from inbuff
179 * \param result the result pointer from the nfa (what ever that is)
181 * In case of errors, returns the best match so far,
182 * which the caller is free to ignore.
185 * \retval 1 no match found
186 * \retval 2 overrun'of input buffer
187 * \retval 3 looping too far
191 int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t incharsleft,
194 #define YAZ_NFA_SUCCESS 0
195 #define YAZ_NFA_NOMATCH 1
196 #define YAZ_NFA_OVERRUN 2
197 #define YAZ_NFA_LOOP 3
200 /** \brief Get the first state of the NFA.
204 * Useful for iterating through all states, probably calling get_result
205 * for each, and doing something to the results (freeing memory?)
207 * \returns a pointer to the first state, or NULL if none.
209 yaz_nfa_state *yaz_nfa_get_first(yaz_nfa *n);
211 /** \brief Get the next state of the NFA.
214 * \param s the state to add to
215 * \return the next state, or NULL if no more.
217 yaz_nfa_state *yaz_nfa_get_next(yaz_nfa *n, yaz_nfa_state *s);
219 /** \brief Dump the NFA into a file .
221 * \param F The file handle to dump into (null => stdout)
223 * \param strfunc can be used for converting the resultinfo a string.
225 * strfunc is a function like
226 * char *f( void *result);
227 * it takes the result, and converts into a printable string (which
228 * must be allocated somewhere by the caller). If the results are
229 * already printable, passing a null pointer here prints them with a %s
232 void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) );