struct icu_buf_utf16 *tkn16 = icu_buf_utf16_create(0);
struct icu_buf_utf8 *tkn8 = icu_buf_utf8_create(0);
struct icu_tokenizer *tokenizer = 0;
+ size_t org_start, org_len;
/* transforming to UTF16 */
icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
icu_check_status(status);
/* perform work on tokens */
- while (icu_tokenizer_next_token(tokenizer, tkn16, &status))
+ while (icu_tokenizer_next_token(tokenizer, tkn16, &status,
+ &org_start, &org_len))
{
icu_check_status(status);
char print[1024];
int xmloutput;
int sortoutput;
+ int org_output;
yaz_icu_chain_t chain;
FILE * infile;
FILE * outfile;
" -c file XML configuration\n"
" -p a|c|l|t Print ICU info \n"
" -s Show sort normalization key\n"
+ " -o Show org positions\n"
" -x XML output instread of text\n"
"\n"
"Examples:\n"
p_config->chain = 0;
p_config->infile = 0;
p_config->outfile = stdout;
+ p_config->org_output = 0;
/* set up command line parameters */
- while ((ret = options("c:p:xs", argv, argc, &arg)) != -2)
+ while ((ret = options("c:op:sx", argv, argc, &arg)) != -2)
{
switch (ret)
{
case 'x':
p_config->xmloutput = 1;
break;
+ case 'o':
+ p_config->org_output = 1;
+ break;
case 0:
if (p_config->infile)
{
}
else
{
- fprintf(p_config->outfile, "%lu %lu '%s' '%s' %ld+%ld",
+ fprintf(p_config->outfile, "%lu %lu '%s' '%s'",
token_count,
line_count,
icu_chain_token_norm(p_config->chain),
- icu_chain_token_display(p_config->chain),
- (long) start,
- (long) len);
+ icu_chain_token_display(p_config->chain));
if (p_config->sortoutput)
{
fprintf(p_config->outfile, " '%s'", wrbuf_cstr(sw));
}
+ if (p_config->org_output)
+ {
+ fprintf(p_config->outfile, " %ld+%ld",
+ (long) start, (long) len);
+ }
fprintf(p_config->outfile, "\n");
}
}