2 # the next line restats using tclsh \
5 # $Id: charconv.tcl,v 1.6 2004-03-20 07:16:25 adam Exp $
8 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
12 proc preamble_trie {ofilehandle} {
15 set totype {unsigned short}
17 puts $f "\#include <string.h>"
19 struct yaz_iconv_trie_flat {
23 struct yaz_iconv_trie_dir {
28 struct yaz_iconv_trie {
29 struct yaz_iconv_trie_flat *flat;
30 struct yaz_iconv_trie_dir *dir;
34 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
35 size_t inbytesleft, size_t *no_read)
37 struct yaz_iconv_trie *t = (ptr >= 0) ? ptrs[ptr] : 0;
38 if (!t || inbytesleft < 1)
42 size_t ch = inp[0] & 0xff;
44 lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read);
59 struct yaz_iconv_trie_flat *flat = t->flat;
62 size_t len = strlen(flat->from);
63 if (len <= inbytesleft)
65 if (memcmp(flat->from, inp, len) == 0)
82 foreach x [array names trie] {
93 proc ins_trie {from to} {
95 if {![info exists trie(no)]} {
100 if {$trie(max) < $to} {
104 ins_trie_r [split $from] $to 0
107 proc split_trie {this} {
109 set trie($this,type) d
110 foreach e $trie($this,content) {
111 set from [lindex $e 0]
114 set ch [lindex $from 0]
115 set rest [lrange $from 1 end]
117 if {[llength $rest]} {
118 if {![info exist trie($this,ptr,$ch)]} {
119 set trie($this,ptr,$ch) $trie(no)
122 ins_trie_r $rest $to $trie($this,ptr,$ch)
124 set trie($this,to,$ch) $to
127 set trie($this,content) missing
130 proc ins_trie_r {from to this} {
133 if {![info exist trie($this,type)]} {
134 set trie($this,type) f
136 if {$trie($this,type) == "f"} {
137 lappend trie($this,content) [list $from $to]
140 if {[llength $trie($this,content)] > $trie(split)} {
142 return [ins_trie_r $from $to $this]
145 set ch [lindex $from 0]
146 set rest [lrange $from 1 end]
148 if {[llength $rest]} {
149 if {![info exist trie($this,ptr,$ch)]} {
150 set trie($this,ptr,$ch) $trie(no)
153 ins_trie_r $rest $to $trie($this,ptr,$ch)
155 set trie($this,to,$ch) $to
160 proc dump_trie {ofilehandle} {
165 puts $f "/* TRIE: size $trie(size) */"
168 while { [incr this -1] >= 0 } {
169 puts $f "/* PAGE $this */"
170 if {$trie($this,type) == "f"} {
171 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
172 foreach m $trie($this,content) {
173 puts -nonewline $f " \{\""
174 foreach d [lindex $m 0] {
175 puts -nonewline $f "\\x$d"
177 puts -nonewline $f "\", 0x[lindex $m 1]"
180 puts $f " \{\"\", 0\}"
182 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
183 puts $f " $trie(prefix)page${this}_flat, 0"
186 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
187 for {set i 0} {$i < 256} {incr i} {
188 puts -nonewline $f " \{"
189 set ch [format %02X $i]
191 if {[info exist trie($this,ptr,$ch)]} {
192 puts -nonewline $f "$trie($this,ptr,$ch), "
195 puts -nonewline $f "-1, "
197 if {[info exist trie($this,to,$ch)]} {
198 puts -nonewline $f "0x$trie($this,to,$ch)\}"
201 puts -nonewline $f "0\}"
204 puts -nonewline $f " /* $ch */"
213 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
214 puts $f " 0, $trie(prefix)page${this}_dir"
219 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
220 for {set this 0} {$this < $trie(no)} {incr this} {
221 puts $f " &$trie(prefix)page$this,"
226 puts $f "unsigned long yaz_$trie(prefix)_conv
227 (unsigned char *inp, size_t inbytesleft, size_t *no_read)
231 code = lookup($trie(prefix)ptrs, 0, inp, inbytesleft, no_read);
242 proc readfile {fname ofilehandle prefix omits} {
248 set f [open $fname r]
252 set cnt [gets $f line]
256 if {[regexp {<entitymap>} $line s]} {
258 set trie(prefix) "${prefix}"
259 } elseif {[regexp {</entitymap>} $line s]} {
260 dump_trie $ofilehandle
261 } elseif {[regexp {<character hex="([^\"]*)".*<unientity>([0-9A-Fa-f]*)</unientity>} $line s hex ucs]} {
264 } elseif {[regexp {<codeTable number="([0-9]+)"} $line s tablenumber]} {
266 set trie(prefix) "${prefix}_$tablenumber"
267 } elseif {[regexp {</codeTable>} $line s]} {
268 if {[lsearch $omits $tablenumber] == -1} {
269 dump_trie $ofilehandle
271 } elseif {[regexp {</code>} $line s]} {
272 if {[string length $ucs]} {
273 for {set i 0} {$i < [string length $marc]} {incr i 2} {
274 lappend hex [string range $marc $i [expr $i+1]]
276 # puts "ins_trie $hex $ucs"
282 } elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
284 } elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
296 set l [llength $argv]
300 set arg [lindex $argv $i]
301 switch -glob -- $arg {
306 if {[string length $arg]} {
307 set arg [lindex $argv [incr i]]
312 if {[string length $arg]} {
313 set arg [lindex $argv [incr i]]
318 if {[string length $arg]} {
319 set arg [lindex $argv [incr i]]
324 if {[string length $arg]} {
325 set arg [lindex $argv [incr i]]
335 if {![info exists ifiles]} {
336 puts "charconv.tcl: missing input file(s)"
340 set ofilehandle [open $ofile w]
341 preamble_trie $ofilehandle
343 foreach ifile $ifiles {
344 readfile $ifile $ofilehandle $prefix $omits