X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=include%2Fyaz%2Fnfa.h;h=6880c701dce9d35561615f06336703333ef63e6a;hb=9a931f88892b2d8be796bfafa07e430307b7c5f6;hp=a61b5c1b3c4f5acacf5a4647d25e1ea746d1a78d;hpb=2b8a6e970b5ed4182a67d423ba4ba5dc74ffc6bb;p=yaz-moved-to-github.git diff --git a/include/yaz/nfa.h b/include/yaz/nfa.h index a61b5c1..6880c70 100644 --- a/include/yaz/nfa.h +++ b/include/yaz/nfa.h @@ -1,6 +1,6 @@ /* Copyright (C) 2006, Index Data ApS * See the file LICENSE for details. - * $Id: nfa.h,v 1.2 2006-05-03 11:09:59 heikki Exp $ + * $Id: nfa.h,v 1.6 2006-05-05 14:02:27 heikki Exp $ */ /** @@ -15,10 +15,17 @@ * possible sequence of input characters that match the ranges in the * conditions, and that leads into a terminal state. * + * Separate from this we have converters. Those can often be used + * together with a NFA (think match-pattern and replace-pattern). + * + * A converter is a routine that produces some output. It can translate a + * range of characters into another range, emit a constant string, or + * something like that. + * */ -#ifndef NFA_H -#define NFA_H +#ifndef YAZ_NFA_H +#define YAZ_NFA_H #include @@ -39,6 +46,11 @@ typedef struct yaz_nfa_state yaz_nfa_state; typedef struct yaz_nfa_transition yaz_nfa_transition; +/** brief Simple character range converter */ +typedef struct yaz_nfa_converter yaz_nfa_converter; + + + /** \brief Initialize the NFA without any states in it * * \return a pointer to the newly created NFA @@ -86,12 +98,13 @@ void *yaz_nfa_get_result( yaz_nfa *n /** The NFA itself */, yaz_nfa_state *s /** The state whose result you want */); -/** \brief Set the backref number to a state. +/** \brief Set a backref point to a state. * - * Each state can be the beginning and/or ending of a backref - * sequence. This call sets those flags in the states. After matching, - * we can get hold of the backrefs that matched, and use them in our - * translations. The backrefs start at 1, not zero! + * Each state can be the beginning and/or ending point of a backref + * sequence. This call sets one of those flags in the state. After + * matching, we can get hold of the backrefs that matched, and use + * them in our translations. The numbering of backrefs start at 1, + * not zero! * * \param n the nfa * \param s the state to add to @@ -103,11 +116,11 @@ void *yaz_nfa_get_result( * */ -int yaz_nfa_set_backref(yaz_nfa *n, yaz_nfa_state *s, +int yaz_nfa_set_backref_point(yaz_nfa *n, yaz_nfa_state *s, int backref_number, int is_start ); -/** \brief Get the backref number of a state. +/** \brief Get the backref point of a state * * \param n the nfa * \param s the state to add to @@ -115,7 +128,7 @@ int yaz_nfa_set_backref(yaz_nfa *n, yaz_nfa_state *s, * \return the backref number associated with the state, or 0 if none. */ -int yaz_nfa_get_backref(yaz_nfa *n, yaz_nfa_state *s, +int yaz_nfa_get_backref_point(yaz_nfa *n, yaz_nfa_state *s, int is_start ); /** \brief Add a transition to the NFA. @@ -123,7 +136,7 @@ int yaz_nfa_get_backref(yaz_nfa *n, yaz_nfa_state *s, * Add a transition between two existing states. The condition * is (as always) a range of yaz_nfa_chars. * \param n the nfa - * \param from_state which state the transition is from + * \param from_state which state the transition is from. null=initial * \param to_state where the transition goes to * \param range_start is the beginning of the range of values * \param range_end is the end of the range of values @@ -175,7 +188,7 @@ yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n, * * \param n the nfa itself * \param inbuff buffer of input data. Will be incremented when match - * \param incharsleft max number of inchars to use from inbuff + * \param incharsleft max number of inchars to use from inbuff. decrements. * \param result the result pointer from the nfa (what ever that is) * * In case of errors, returns the best match so far, @@ -188,14 +201,116 @@ yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n, * */ -int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t incharsleft, +int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft, void **result ); +/** yaz_nfa_match return codes */ #define YAZ_NFA_SUCCESS 0 #define YAZ_NFA_NOMATCH 1 #define YAZ_NFA_OVERRUN 2 #define YAZ_NFA_LOOP 3 +/** \brief Get a back reference after a successfull match. + * + * \param n the nfa + * \param backref_no the number of the backref to get + * \param start beginning of the matching substring + * \param end end of the matching substring + * + * Returns pointers to the beginning and end of a backref, or null + * pointers if one endpoint not met. Those pointers point to the + * original buffer that was matched, so the caller will not have to + * worry about freeing anything special. + * + * It is technically possible to create NFAs that meet the start but + * not the end of a backref. It is up to the caller to decide how + * to handle such a situation. + * + * \retval 0 OK + * \retval 1 no match + * \retval 2 no such backref + */ + +int yaz_nfa_get_backref( yaz_nfa *n, + int backref_no, + yaz_nfa_char **start, + yaz_nfa_char **end ); + +/** \brief Create a string converter. + * \param n the nfa + * \param string the string to output + * \param length how many chars in the string + * + * This converter produces a constant string in the output + */ +yaz_nfa_converter *yaz_nfa_create_string_converter ( + yaz_nfa *n, + yaz_nfa_char *string, + size_t length ); + +/** \brief Create a backref converter + * \param n the nfa + * \param backref_no The backreference to reproduce + * + * This converter copies a backref into the output buffer + */ +yaz_nfa_converter *yaz_nfa_create_backref_converter ( + yaz_nfa *n, int backref_no ); + + +/** \brief Create a charcater range converter + * \param n the nfa + * \param backref_no The backreference to reproduce + * \param from_char the first character of the original range + * \param to_char the first character of the target range + * + * This converter takes a backreference, and shifts the characters + * by a constant value. For example, translating a-z to A-Z. + * Note that backref 0 is always the last character that matched a + * range, even if no backrefs were defined in teh nfa. This makes + * it pretty useful with this converter. + * + */ +yaz_nfa_converter *yaz_nfa_create_range_converter ( + yaz_nfa *n, int backref_no, + yaz_nfa_char from_char, + yaz_nfa_char to_char); + + +/** \brief Connects converters in a chain. + * \param n the nfa (mostly for nmem access) + * \param startpoint the first converter in the chain + * \param newconverter + * + * Places the new converter at the end of the chain that starts from + * startpoint. + * + */ +void yaz_nfa_append_converter ( + yaz_nfa *n, + yaz_nfa_converter *startpoint, + yaz_nfa_converter *newconverter); + +/** brief Runs the chain of converters. + * \param n the nfa (mostly for nmem access) + * \param c the first converter in a chain + * \param outbuff buffer to write the output in. Increments the ptr. + * \param outcharsleft how many may we write + * + * Runs the converters in the chain, placing output into outbuff + * (and incrementing the pointer). + * + * \retval 0 OK + * \retval 1 no match to get backrefs from + * \retval 2 no room in outbuf + * + */ +int yaz_nfa_run_converters( + yaz_nfa *n, + yaz_nfa_converter *c, + yaz_nfa_char **outbuff, + size_t *outcharsleft); + /** \brief Get the first state of the NFA. * @@ -232,6 +347,9 @@ yaz_nfa_state *yaz_nfa_get_next(yaz_nfa *n, yaz_nfa_state *s); void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) ); + + + YAZ_END_CDECL #endif