2 # the next line restats using tclsh \
5 # $Id: charconv.tcl,v 1.12 2006-04-19 23:15:39 adam Exp $
8 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
12 proc preamble_trie {ofilehandle} {
15 set totype {unsigned }
17 puts $f "\#include <string.h>"
19 struct yaz_iconv_trie_flat {
21 unsigned combining : 1;
24 struct yaz_iconv_trie_dir {
26 unsigned combining : 1;
30 struct yaz_iconv_trie {
31 struct yaz_iconv_trie_flat *flat;
32 struct yaz_iconv_trie_dir *dir;
36 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
37 size_t inbytesleft, size_t *no_read, int *combining)
39 struct yaz_iconv_trie *t = (ptr > 0) ? ptrs[ptr-1] : 0;
40 if (!t || inbytesleft < 1)
44 size_t ch = inp[0] & 0xff;
46 lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read, combining);
55 *combining = t->dir[ch].combining;
62 struct yaz_iconv_trie_flat *flat = t->flat;
65 size_t len = strlen(flat->from);
66 if (len <= inbytesleft)
68 if (memcmp(flat->from, inp, len) == 0)
71 *combining = flat->combining;
86 foreach x [array names trie] {
97 proc ins_trie {from to combining codename} {
99 if {![info exists trie(no)]} {
104 if {$trie(max) < $to} {
108 ins_trie_r [split $from] $to $combining $codename 0
111 proc split_trie {this} {
113 set trie($this,type) d
114 foreach e $trie($this,content) {
115 set from [lindex $e 0]
117 set combining [lindex $e 2]
118 set codename [lindex $e 3]
120 set ch [lindex $from 0]
121 set rest [lrange $from 1 end]
123 if {[llength $rest]} {
124 if {![info exist trie($this,ptr,$ch)]} {
125 set trie($this,ptr,$ch) $trie(no)
128 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
130 set trie($this,to,$ch) $to
131 set trie($this,combining,$ch) $combining
132 set trie($this,codename,$ch) $codename
135 set trie($this,content) missing
138 proc ins_trie_r {from to combining codename this} {
141 if {![info exist trie($this,type)]} {
142 set trie($this,type) f
144 if {$trie($this,type) == "f"} {
145 lappend trie($this,content) [list $from $to $combining $codename]
148 if {[llength $trie($this,content)] > $trie(split)} {
150 return [ins_trie_r $from $to $combining $codename $this]
153 set ch [lindex $from 0]
154 set rest [lrange $from 1 end]
156 if {[llength $rest]} {
157 if {![info exist trie($this,ptr,$ch)]} {
158 set trie($this,ptr,$ch) $trie(no)
161 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
163 set trie($this,to,$ch) $to
164 set trie($this,combining,$ch) $combining
165 set trie($this,codename,$ch) $codename
170 proc dump_trie {ofilehandle} {
175 puts $f "/* TRIE: size $trie(size) */"
178 while { [incr this -1] >= 0 } {
179 puts $f "/* PAGE $this */"
180 if {$trie($this,type) == "f"} {
181 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
182 foreach m $trie($this,content) {
183 puts -nonewline $f " \{\""
184 foreach d [lindex $m 0] {
185 puts -nonewline $f "\\x$d"
187 puts -nonewline $f "\", [lindex $m 2], 0x[lindex $m 1]"
189 puts $f "\}, /* $v */"
191 puts $f " \{\"\", 0\}"
193 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
194 puts $f " $trie(prefix)page${this}_flat, 0"
197 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
198 for {set i 0} {$i < 256} {incr i} {
199 puts -nonewline $f " \{"
200 set ch [format %02X $i]
202 if {[info exist trie($this,ptr,$ch)]} {
203 puts -nonewline $f "[expr $trie($this,ptr,$ch)+1], "
206 puts -nonewline $f "0, "
208 if {[info exist trie($this,combining,$ch)]} {
209 puts -nonewline $f "$trie($this,combining,$ch), "
211 puts -nonewline $f "0, "
213 if {[info exist trie($this,to,$ch)]} {
214 puts -nonewline $f "0x$trie($this,to,$ch)\}"
217 puts -nonewline $f "0\}"
219 if {[info exist trie($this,codename,$ch)]} {
220 set v $trie($this,codename,$ch)
221 puts -nonewline $f " /* $v */"
230 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
231 puts $f " 0, $trie(prefix)page${this}_dir"
236 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
237 for {set this 0} {$this < $trie(no)} {incr this} {
238 puts $f " &$trie(prefix)page$this,"
243 puts $f "unsigned long yaz_$trie(prefix)_conv
244 (unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining)
248 code = lookup($trie(prefix)ptrs, 1, inp, inbytesleft, no_read, combining);
258 proc readfile {fname ofilehandle prefix omits reverse} {
266 set f [open $fname r]
272 set cnt [gets $f line]
276 if {[regexp {<entitymap>} $line s]} {
278 set trie(prefix) "${prefix}"
279 } elseif {[regexp {</entitymap>} $line s]} {
280 dump_trie $ofilehandle
281 } elseif {[regexp {<character hex="([^\"]*)".*<unientity>([0-9A-Fa-f]*)</unientity>} $line s hex ucs]} {
282 ins_trie $hex $ucs $combining {}
284 } elseif {[regexp {<codeTable .*number="([0-9]+)"} $line s tablenumber]} {
286 set trie(prefix) "${prefix}_$tablenumber"
288 } elseif {[regexp {</codeTable>} $line s]} {
289 if {[lsearch $omits $tablenumber] == -1} {
290 dump_trie $ofilehandle
292 } elseif {[regexp {</code>} $line s]} {
293 if {[string length $ucs]} {
295 for {set i 0} {$i < [string length $utf]} {incr i 2} {
296 lappend hex [string range $utf $i [expr $i+1]]
298 # puts "ins_trie $hex $marc
299 ins_trie $hex $marc $combining $codename
302 for {set i 0} {$i < [string length $marc]} {incr i 2} {
303 lappend hex [string range $marc $i [expr $i+1]]
305 # puts "ins_trie $hex $ucs"
306 ins_trie $hex $ucs $combining $codename
314 } elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
316 } elseif {[regexp {<name>(.*)</name>} $line s codename]} {
318 } elseif {[regexp {<name>(.*)} $line s codename]} {
321 set cnt [gets $f line]
325 if {[regexp {(.*)</name>} $line s codename_ex]} {
326 set codename "${codename} ${codename_ex}"
328 } elseif {[regexp {<isCombining>true</isCombining>} $line s]} {
330 } elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
332 } elseif {[regexp {<utf-8>([0-9A-Fa-f]*)</utf-8>} $line s utf]} {
345 set l [llength $argv]
349 set arg [lindex $argv $i]
350 switch -glob -- $arg {
355 if {[string length $arg]} {
356 set arg [lindex $argv [incr i]]
361 if {[string length $arg]} {
362 set arg [lindex $argv [incr i]]
367 if {[string length $arg]} {
368 set arg [lindex $argv [incr i]]
373 if {[string length $arg]} {
374 set arg [lindex $argv [incr i]]
387 if {![info exists ifiles]} {
388 puts "charconv.tcl: missing input file(s)"
392 set ofilehandle [open $ofile w]
393 preamble_trie $ofilehandle
395 foreach ifile $ifiles {
396 readfile $ifile $ofilehandle $prefix $omits $reverse_map