+static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat,
+ const char *str, int length)
+{
+ struct it_key key;
+ ZebraHandle zh = p->extractCtrl->handle;
+ ZebraExplainInfo zei = zh->reg->zei;
+ int ch, i;
+
+ ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
+ if (ch < 0)
+ ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
+
+ i = 0;
+ key.mem[i++] = ch;
+ key.mem[i++] = p->record_id;
+ key.mem[i++] = p->section_id;
+
+ if (zh->m_segment_indexing)
+ key.mem[i++] = p->segment;
+ key.mem[i++] = p->seqno;
+ key.len = i;
+
+ zebra_rec_keys_write(zh->reg->keys, str, length, &key);
+}
+
+static void extract_add_sort_string(RecWord *p, const char *str, int length)
+{
+ struct it_key key;
+ ZebraHandle zh = p->extractCtrl->handle;
+ ZebraExplainInfo zei = zh->reg->zei;
+ int ch;
+ zinfo_index_category_t cat = zinfo_index_category_sort;
+
+ ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
+ if (ch < 0)
+ ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
+ key.len = 3;
+ key.mem[0] = ch;
+ key.mem[1] = p->record_id;
+ key.mem[2] = p->section_id;
+
+ zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
+}
+
+static void extract_add_staticrank_string(RecWord *p,
+ const char *str, int length)
+{
+ char valz[40];
+ struct recExtractCtrl *ctrl = p->extractCtrl;
+
+ if (length > sizeof(valz)-1)
+ length = sizeof(valz)-1;
+
+ memcpy(valz, str, length);
+ valz[length] = '\0';
+ ctrl->staticrank = atozint(valz);
+}
+
+static void extract_add_string(RecWord *p, zebra_map_t zm,
+ const char *string, int length)
+{
+ assert(length > 0);
+
+ if (!p->index_name)
+ return;
+ if (log_level_details)
+ {
+
+ WRBUF w = wrbuf_alloc();
+
+ wrbuf_write_escaped(w, string, length);
+ yaz_log(log_level_details, "extract_add_string: %s", wrbuf_cstr(w));
+ wrbuf_destroy(w);
+ }
+ if (zebra_maps_is_index(zm))
+ {
+ extract_add_index_string(p, zinfo_index_category_index,
+ string, length);
+ if (zebra_maps_is_alwaysmatches(zm))
+ {
+ RecWord word;
+ memcpy(&word, p, sizeof(word));
+
+ word.seqno = 1;
+ extract_add_index_string(
+ &word, zinfo_index_category_alwaysmatches, "", 0);
+ }
+ }
+ else if (zebra_maps_is_sort(zm))
+ {
+ extract_add_sort_string(p, string, length);
+ }
+ else if (zebra_maps_is_staticrank(zm))
+ {
+ extract_add_staticrank_string(p, string, length);
+ }
+}
+
+static void extract_add_incomplete_field(RecWord *p, zebra_map_t zm)
+{
+ const char *b = p->term_buf;
+ int remain = p->term_len;
+ int first = 1;
+ const char **map = 0;
+
+ if (remain > 0)
+ map = zebra_maps_input(zm, &b, remain, 0);
+
+ while (map)
+ {
+ char buf[IT_MAX_WORD+1];
+ int i, remain;
+
+ /* Skip spaces */
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->term_len - (b - p->term_buf);
+ if (remain > 0)
+ map = zebra_maps_input(zm, &b, remain, 0);
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+ i = 0;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ remain = p->term_len - (b - p->term_buf);
+ if (remain > 0)
+ map = zebra_maps_input(zm, &b, remain, 0);
+ else
+ map = 0;
+ }
+ if (!i)
+ return;
+
+ if (first)
+ {
+ first = 0;
+ if (zebra_maps_is_first_in_field(zm))
+ {
+ /* first in field marker */
+ extract_add_string(p, zm, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
+ p->seqno++;
+ }
+ }
+ extract_add_string(p, zm, buf, i);
+ p->seqno++;
+ }
+}
+
+static void extract_add_complete_field(RecWord *p, zebra_map_t zm)
+{
+ const char *b = p->term_buf;
+ char buf[IT_MAX_WORD+1];
+ const char **map = 0;
+ int i = 0, remain = p->term_len;
+
+ if (remain > 0)
+ map = zebra_maps_input(zm, &b, remain, 1);
+
+ while (remain > 0 && i < IT_MAX_WORD)
+ {
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->term_len - (b - p->term_buf);
+
+ if (remain > 0)
+ {
+ int first = i ? 0 : 1; /* first position */
+ map = zebra_maps_input(zm, &b, remain, first);
+ }
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+
+ if (i && i < IT_MAX_WORD)
+ buf[i++] = *CHR_SPACE;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ if (**map == *CHR_CUT)
+ {
+ i = 0;
+ }
+ else
+ {
+ if (i >= IT_MAX_WORD)
+ break;
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ }
+ remain = p->term_len - (b - p->term_buf);
+ if (remain > 0)
+ {
+ map = zebra_maps_input(zm, &b, remain, 0);
+ }
+ else
+ map = 0;
+ }
+ }
+ if (!i)
+ return;
+ extract_add_string(p, zm, buf, i);
+ p->seqno++;
+}
+
+static void extract_add_icu(RecWord *p, zebra_map_t zm)
+{
+ const char *res_buf = 0;
+ size_t res_len = 0;
+
+ zebra_map_tokenize_start(zm, p->term_buf, p->term_len);
+ while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0))
+ {
+ if (res_len > IT_MAX_WORD)
+ {
+ yaz_log(YLOG_LOG, "Truncating long term %ld", (long) res_len);
+ res_len = IT_MAX_WORD;
+ }
+ extract_add_string(p, zm, res_buf, res_len);
+ p->seqno++;
+ }
+}
+
+
+/** \brief top-level indexing handler for recctrl system
+ \param p token data to be indexed
+
+ Call sequence:
+ extract_token_add
+ extract_add_{in}_complete / extract_add_icu
+ extract_add_string
+
+ extract_add_index_string
+ or
+ extract_add_sort_string
+ or
+ extract_add_staticrank_string
+
+*/
+static void extract_token_add(RecWord *p)
+{
+ ZebraHandle zh = p->extractCtrl->handle;
+ zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, p->index_type);
+
+ if (log_level_details)
+ {
+ yaz_log(log_level_details, "extract_token_add "
+ "type=%s index=%s seqno=" ZINT_FORMAT " s=%.*s",
+ p->index_type, p->index_name,
+ p->seqno, p->term_len, p->term_buf);
+ }
+ if (zebra_maps_is_icu(zm))
+ {
+ extract_add_icu(p, zm);
+ }