2 # the next line restats using tclsh \
5 # $Id: charconv.tcl,v 1.13 2006-04-20 20:50:51 adam Exp $
8 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
12 proc preamble_trie {ofilehandle ifiles ofile} {
15 set totype {unsigned }
17 puts $f "/** \\file $ofile"
18 puts $f " \\brief Character conversion, generated from [lindex $ifiles 0]"
20 puts $f " Generated automatically by charconv.tcl"
22 puts $f "\#include <string.h>"
24 struct yaz_iconv_trie_flat {
26 unsigned combining : 1;
29 struct yaz_iconv_trie_dir {
31 unsigned combining : 1;
35 struct yaz_iconv_trie {
36 struct yaz_iconv_trie_flat *flat;
37 struct yaz_iconv_trie_dir *dir;
41 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
42 size_t inbytesleft, size_t *no_read, int *combining)
44 struct yaz_iconv_trie *t = (ptr > 0) ? ptrs[ptr-1] : 0;
45 if (!t || inbytesleft < 1)
49 size_t ch = inp[0] & 0xff;
51 lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read, combining);
60 *combining = t->dir[ch].combining;
67 struct yaz_iconv_trie_flat *flat = t->flat;
70 size_t len = strlen(flat->from);
71 if (len <= inbytesleft)
73 if (memcmp(flat->from, inp, len) == 0)
76 *combining = flat->combining;
91 foreach x [array names trie] {
102 proc ins_trie {from to combining codename} {
104 if {![info exists trie(no)]} {
109 if {$trie(max) < $to} {
113 ins_trie_r [split $from] $to $combining $codename 0
116 proc split_trie {this} {
118 set trie($this,type) d
119 foreach e $trie($this,content) {
120 set from [lindex $e 0]
122 set combining [lindex $e 2]
123 set codename [lindex $e 3]
125 set ch [lindex $from 0]
126 set rest [lrange $from 1 end]
128 if {[llength $rest]} {
129 if {![info exist trie($this,ptr,$ch)]} {
130 set trie($this,ptr,$ch) $trie(no)
133 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
135 set trie($this,to,$ch) $to
136 set trie($this,combining,$ch) $combining
137 set trie($this,codename,$ch) $codename
140 set trie($this,content) missing
143 proc ins_trie_r {from to combining codename this} {
146 if {![info exist trie($this,type)]} {
147 set trie($this,type) f
149 if {$trie($this,type) == "f"} {
150 lappend trie($this,content) [list $from $to $combining $codename]
153 if {[llength $trie($this,content)] > $trie(split)} {
155 return [ins_trie_r $from $to $combining $codename $this]
158 set ch [lindex $from 0]
159 set rest [lrange $from 1 end]
161 if {[llength $rest]} {
162 if {![info exist trie($this,ptr,$ch)]} {
163 set trie($this,ptr,$ch) $trie(no)
166 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
168 set trie($this,to,$ch) $to
169 set trie($this,combining,$ch) $combining
170 set trie($this,codename,$ch) $codename
175 proc dump_trie {ofilehandle} {
180 puts $f "/* TRIE: size $trie(size) */"
183 while { [incr this -1] >= 0 } {
184 puts $f "/* PAGE $this */"
185 if {$trie($this,type) == "f"} {
186 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
187 foreach m $trie($this,content) {
188 puts -nonewline $f " \{\""
189 foreach d [lindex $m 0] {
190 puts -nonewline $f "\\x$d"
192 puts -nonewline $f "\", [lindex $m 2], 0x[lindex $m 1]"
194 puts $f "\}, /* $v */"
196 puts $f " \{\"\", 0\}"
198 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
199 puts $f " $trie(prefix)page${this}_flat, 0"
202 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
203 for {set i 0} {$i < 256} {incr i} {
204 puts -nonewline $f " \{"
205 set ch [format %02X $i]
207 if {[info exist trie($this,ptr,$ch)]} {
208 puts -nonewline $f "[expr $trie($this,ptr,$ch)+1], "
211 puts -nonewline $f "0, "
213 if {[info exist trie($this,combining,$ch)]} {
214 puts -nonewline $f "$trie($this,combining,$ch), "
216 puts -nonewline $f "0, "
218 if {[info exist trie($this,to,$ch)]} {
219 puts -nonewline $f "0x$trie($this,to,$ch)\}"
222 puts -nonewline $f "0\}"
224 if {[info exist trie($this,codename,$ch)]} {
225 set v $trie($this,codename,$ch)
226 puts -nonewline $f " /* $v */"
235 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
236 puts $f " 0, $trie(prefix)page${this}_dir"
241 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
242 for {set this 0} {$this < $trie(no)} {incr this} {
243 puts $f " &$trie(prefix)page$this,"
248 puts $f "unsigned long yaz_$trie(prefix)_conv
249 (unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining)
253 code = lookup($trie(prefix)ptrs, 1, inp, inbytesleft, no_read, combining);
263 proc readfile {fname ofilehandle prefix omits reverse} {
271 set f [open $fname r]
277 set cnt [gets $f line]
281 if {[regexp {<entitymap>} $line s]} {
283 set trie(prefix) "${prefix}"
284 } elseif {[regexp {</entitymap>} $line s]} {
285 dump_trie $ofilehandle
286 } elseif {[regexp {<character hex="([^\"]*)".*<unientity>([0-9A-Fa-f]*)</unientity>} $line s hex ucs]} {
287 ins_trie $hex $ucs $combining {}
289 } elseif {[regexp {<codeTable .*number="([0-9]+)"} $line s tablenumber]} {
291 set trie(prefix) "${prefix}_$tablenumber"
293 } elseif {[regexp {</codeTable>} $line s]} {
294 if {[lsearch $omits $tablenumber] == -1} {
295 dump_trie $ofilehandle
297 } elseif {[regexp {</code>} $line s]} {
298 if {[string length $ucs]} {
300 for {set i 0} {$i < [string length $utf]} {incr i 2} {
301 lappend hex [string range $utf $i [expr $i+1]]
303 # puts "ins_trie $hex $marc
304 ins_trie $hex $marc $combining $codename
307 for {set i 0} {$i < [string length $marc]} {incr i 2} {
308 lappend hex [string range $marc $i [expr $i+1]]
310 # puts "ins_trie $hex $ucs"
311 ins_trie $hex $ucs $combining $codename
319 } elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
321 } elseif {[regexp {<name>(.*)</name>} $line s codename]} {
323 } elseif {[regexp {<name>(.*)} $line s codename]} {
326 set cnt [gets $f line]
330 if {[regexp {(.*)</name>} $line s codename_ex]} {
331 set codename "${codename} ${codename_ex}"
333 } elseif {[regexp {<isCombining>true</isCombining>} $line s]} {
335 } elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
337 } elseif {[regexp {<utf-8>([0-9A-Fa-f]*)</utf-8>} $line s utf]} {
350 set l [llength $argv]
354 set arg [lindex $argv $i]
355 switch -glob -- $arg {
360 if {[string length $arg]} {
361 set arg [lindex $argv [incr i]]
366 if {[string length $arg]} {
367 set arg [lindex $argv [incr i]]
372 if {[string length $arg]} {
373 set arg [lindex $argv [incr i]]
378 if {[string length $arg]} {
379 set arg [lindex $argv [incr i]]
392 if {![info exists ifiles]} {
393 puts "charconv.tcl: missing input file(s)"
397 set ofilehandle [open $ofile w]
398 preamble_trie $ofilehandle $ifiles $ofile
400 foreach ifile $ifiles {
401 readfile $ifile $ofilehandle $prefix $omits $reverse_map