[WJG] (24/JUN/11) The Chinese-English Dictionary (http://www.mdbg.net/chindict/chindict.php%|%CEDict%|%) has been around for some time. It has well in excess of 100,000 entries and is updated on a regular basis. The project provides the basic entries and listings but no specific readers. The following script provides such functionality. At first it will check for the presence of a metakit version of the dictionary. If one is not present, then the script will automatically download and convert the latest release for use. To enforce a download, simply delete the file cedict.bg! The script relies the Gnocl package but could be easily adapted to run with Tk. [https://lh4.googleusercontent.com/-iFEOudMWHuc/TgTLj1c0QKI/AAAAAAAAA8M/6-Y2jOuFnDM/s800/Screenshot-CEDict2metakit.tcl.png] ====== #--------------- # CEDict2metakit.tcl #--------------- #!/bin/sh #\ exec tclsh "$0" "$@" package require Gnocl package require Mk4tcl package require http proc progress {token total current} { puts -nonewline "." } #--------------- # obtain latest version of CEDict #--------------- proc get_CEDict {} { set url "http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip" #set url "http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz" set fp [ open [file tail $url] w] set token [http::geturl $url -progress progress -headers {Pragma no-cache} -channel $fp] close $fp eval exec "unzip -o [file tail $url]" } #--------------- # convert ascii pinyin to unicode mapping #--------------- proc convert_pinyin {str} { set pinyin_map { a1 ā a2 á a3 ǎ a4 à a5 a ai1 āi ai2 ái ai3 ǎi ai4 ài ai5 ai ao1 āo ao2 áo ao3 ǎo ao4 ào ao5 ao an1 ān an2 án an3 ǎn an4 àn an5 an ang1 āng ang2 áng ang3 ǎng ang4 àng ang5 ang e1 ē e2 é e3 ě e4 è e5 e ei1 ēi ei2 éi ei3 ěi ei4 èi ei5 ei en1 ēn en2 én en3 ěn en4 èn en5 en eng1 ēng eng2 éng eng3 ěng eng4 èng eng5 eng i1 ī i2 í i3 ǐ i4 ì i5 i ia1 iā ia2 iá ia3 iǎ ia4 ià ia5 ia iao1 iāo iao2 iáo iao3 iǎo iao4 iào iao5 iao iu1 iū iu2 iú iu3 iǔ iu4 iù iu5 iu ian1 iān ian2 ián ian3 iǎn ian4 iàn ian5 ian in1 īn in2 ín in3 ǐn in4 ìn in5 in iang1 iāng iang2 iáng iang3 iǎng iang4 iàng iang5 iang ing1 īng ing2 íng ing3 ǐng ing4 ìng ing5 ing iong1 iōng iong2 ióng iong3 iǒng iong4 iòng iong5 iong o1 ō o2 ó o3 ǒ o4 ò o5 o ong1 ōng ong2 óng ong3 ǒng ong4 òng ong5 ong ou1 ōu ou2 óu ou3 ǒu ou4 òu ou5 ou u1 ū u2 ú u3 ǔ u4 ù u5 u ua1 uā ua2 uá ua3 uǎ ua4 uà ua5 ua uai1 uā uai2 uá uai3 uǎ uai4 uà uai5 ua uo1 uō uo2 uó uo3 uǒ uo4 uò uo5 uo ui1 uī ui2 uí ui3 uǐ ui4 uì ui5 ui uan1 uān uan2 uán uan3 uǎn uan4 uàn uan5 uan uang1 uāng uang2 uáng uang3 uǎng uang4 uàng uang5 uang un1 ūn un2 ún un3 ǔn un4 ùn un5 un u:1 ǖ u:2 ǘ u:3 ǚ u:4 ǜ u:5 ü } return [string map $pinyin_map $str] } #--------------- # convert CEDict text into Mk database # Entry structure # fanti jianti pinyin meaning # 䥯 䥯 [ba4] /plow/ # View cel names # f[anti] = indexes 0 ~ first " " # j]ianti] = first " " ~ first [ # p[inyin] = first [ to first ] # m[eanings] = remainder of the line #--------------- proc CEDict2metakit { {fname ""} {dbname cedict.db} } { # if { $fname == ""} {set fname "cedict_1_0_ts_utf-8_mdbg.txt"} if { $fname == ""} {set fname "cedict_ts.u8" } set fp2 [open text.txt "w"] # make a new database each time if { [file exist $dbname] } { file delete $dbname } # create the metakit db mk::file open db $dbname set parts {f j p m} # create a view within the datafile which describes what we’ll store set view [mk::view layout db.wordlist $parts] # obtain list set fp [open $fname r] set i 0 while {[gets $fp entry] >= 0} { # process file header if {[string index $entry 0] == "#" } { switch [string range $entry 0 1] { "# " { puts [string trimleft $entry {# }] } "#!" { puts [string trimleft $entry {#!}] } } incr i continue } # process entries, find marker locations # j k l m # V V V V # 䥯 䥯 [ ba4 ] /plow/ set j [string first " " $entry] set k [string first "\[" $entry] set l [string first "\]" $entry] set m [string first "/" $entry] set fanti [string range $entry 0 $j] set jianti [string range $entry $j $k] set pinyin [convert_pinyin [string range $entry $k $l]] set meaning [convert_pinyin [string range $entry $m end]] # trim away unwanted markers set fanti [string trim $fanti] set jianti [string trim $jianti " \["] set pinyin [string trim $pinyin "\[\]"] set meaning [string trim $meaning "/"] #if { $i < 250} { #puts "$i >>$fanti<<" #puts "$i >>$jianti<<" #puts "$i >>$pinyin<<" #puts "$i >>$meaning<<" set meaning [string map {/ ", " } $meaning] set meaning [string trim $meaning ", "] set str "f $fanti j $jianti p [list $pinyin] m [list $meaning.]" puts $fp2 $str mk::row append $view $str #} incr i } close $fp close $fp2 mk::file commit $view mk::file close $view return $i } #--------------- # Script called on search entry activation. #--------------- proc on_entry_activate {w t {mode 3} } { # puts [info level 0 ] $::txt clear mk::loop i db.wordlist { set item [mk::get $i] if { [string first $t $item 0] >= 0 } { # parts {f j p m } if {$mode == 1 || $mode == 3 } { $::txt insert end "[mk::get $i f]\n" -tags headword } if {$mode == 2 || $mode == 3 } { $::txt insert end "[mk::get $i j]\n" -tags headword } $::txt insert end "[mk::get $i p]\n" -tags pinyin $::txt insert end "[mk::get $i m]\n\n" gnocl::update } } $::txt search $t -tags searchMatch } #--------------- # create dictionary UI # mode # 1 = fanti # 2 = jianti # 3 = both #--------------- proc viewChnEngDict { {mode 1} } { set box [gnocl::box -orientation vertical] set tb [gnocl::toolBar] set ent [gnocl::entry -baseFont {Sans 12} ] set txt [gnocl::text -editable 0 -wrapMode word] set ::txt $txt set ::mode $mode $::txt tag create headword -font {Sans 14} -paragraph #FAFAFA $::txt tag create pinyin -foreground blue $::txt tag create keyword -foreground red -fontStyle italic $::txt tag create searchMatch -background yellow $ent configure \ -onActivate { [gnocl::winfo toplevel %w ] configure -cursor watch gnocl::update on_entry_activate %w %t $::mode [gnocl::winfo toplevel %w ] configure -cursor last } \ -data $txt $tb add widget $ent $box add $tb -fill {1 1} -expand 0 $box add $txt -fill {1 1 } -expand 1 return $box } if {[file exists cedict.db] != 1} { puts "Obtaining most recent release of CEDict" get_CEDict puts "Starting Conversion." puts "[CEDict2metakit] lines read and converted" file delete *.zip file delete *u8 } mk::file open db cedict.db gnocl::window -child [viewChnEngDict] -width 300 -height 600 -onDelete { exit } gnocl::mainLoop ====== <> Human Language