CEDict Viewer

WJG (24/JUN/11) The Chinese-English Dictionary (CEDict ) has been around for some time. It has well in excess of 100,000 entries and is updated on a regular basis. The project provides the basic entries and listings but no specific readers. The following script provides such functionality. At first it will check for the presence of a metakit version of the dictionary. If one is not present, then the script will automatically download and convert the latest release for use. To enforce a download, simply delete the file cedict.bg! The script relies the Gnocl package but could be easily adapted to run with Tk.

https://lh4.googleusercontent.com/-iFEOudMWHuc/TgTLj1c0QKI/AAAAAAAAA8M/6-Y2jOuFnDM/s800/Screenshot-CEDict2metakit.tcl.png

#---------------
# CEDict2metakit.tcl
#---------------
#!/bin/sh
#\
exec tclsh "$0" "$@"

package require Gnocl
package require Mk4tcl
package require http

proc progress {token total current} {
        puts -nonewline "."
}

#---------------
# obtain latest version of CEDict
#---------------
proc get_CEDict {} {

        set url "http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip"
        #set url "http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz"

        set fp [ open [file tail $url] w]

        set token [http::geturl $url -progress progress -headers {Pragma no-cache} -channel $fp]

        close $fp

        eval exec "unzip -o [file tail $url]"
}

#---------------
# convert ascii pinyin to unicode mapping
#---------------
proc convert_pinyin {str} {

          set pinyin_map {
                a1   ā   a2   á   a3   ǎ   a4   à   a5   a
                ai1  āi  ai2  ái  ai3  ǎi  ai4  ài  ai5  ai
                ao1  āo  ao2  áo  ao3  ǎo  ao4  ào  ao5  ao
                an1  ān  an2  án  an3  ǎn  an4  àn  an5  an
                ang1 āng ang2 áng ang3 ǎng ang4 àng ang5 ang

                e1   ē   e2   é   e3   ě   e4   è   e5   e
                ei1  ēi  ei2  éi  ei3  ěi  ei4  èi  ei5  ei
                en1  ēn  en2  én  en3  ěn  en4  èn  en5  en
                eng1 ēng eng2 éng eng3 ěng eng4 èng eng5 eng

                i1    ī    i2    í    i3    ǐ    i4    ì    i5    i
                ia1   iā   ia2   iá   ia3   iǎ   ia4   ià   ia5   ia
                iao1  iāo  iao2  iáo  iao3  iǎo  iao4  iào  iao5  iao
                iu1   iū   iu2   iú   iu3   iǔ   iu4   iù   iu5   iu
                ian1  iān  ian2  ián  ian3  iǎn  ian4  iàn  ian5  ian
                in1   īn   in2   ín   in3   ǐn   in4   ìn   in5   in
                iang1 iāng iang2 iáng iang3 iǎng iang4 iàng iang5 iang
                ing1  īng  ing2  íng  ing3  ǐng  ing4  ìng  ing5  ing
                iong1 iōng iong2 ióng iong3 iǒng iong4 iòng iong5 iong

                o1   ō   o2   ó   o3   ǒ   o4   ò   o5   o
                ong1 ōng ong2 óng ong3 ǒng ong4 òng ong5 ong
                ou1  ōu  ou2  óu  ou3  ǒu  ou4  òu  ou5  ou

                u1    ū    u2    ú     u3    ǔ     u4   ù    u5    u
                ua1   uā   ua2   uá    ua3   uǎ   ua4   uà   ua5   ua
                uai1  uā   uai2  uá    uai3  uǎ   uai4  uà   uai5  ua
                uo1   uō   uo2   uó    uo3   uǒ   uo4   uò   uo5   uo
                ui1   uī   ui2   uí    ui3   uǐ   ui4   uì   ui5   ui
                uan1  uān  uan2  uán   uan3  uǎn  uan4  uàn  uan5  uan
                uang1 uāng uang2 uáng  uang3 uǎng uang4 uàng uang5 uang
                un1   ūn   un2   ún    un3   ǔn   un4   ùn   un5   un

                u:1  ǖ  u:2  ǘ  u:3  ǚ  u:4  ǜ  u:5 ü
          }
        return [string map $pinyin_map $str]
}


#---------------
# convert CEDict text into Mk database
# Entry structure
#        fanti jianti pinyin meaning
#         䥯 䥯 [ba4] /plow/
# View cel names
#        f[anti]    = indexes 0 ~ first " "
#   j]ianti]   = first " " ~ first [
#        p[inyin]   = first [ to first ]
#   m[eanings] = remainder of the line
#---------------
proc CEDict2metakit { {fname ""} {dbname cedict.db} } {

#        if { $fname == ""} {set fname "cedict_1_0_ts_utf-8_mdbg.txt"}
        if { $fname == ""} {set fname "cedict_ts.u8" }

set fp2 [open text.txt "w"]


        # make a new database each time
        if { [file exist $dbname] } {
                file delete $dbname
                }

        # create the metakit db
        mk::file open db $dbname

        set parts  {f j p m}

        # create a view within the datafile which describes what we’ll store
        set view [mk::view layout db.wordlist $parts]

        # obtain list
        set fp [open $fname r]

        set i 0
        while {[gets $fp entry] >= 0} {

                # process file header
                if {[string index $entry 0] == "#" } {
                        switch [string range $entry 0 1] {
                                "# " {
                                        puts [string trimleft $entry {# }]
                                }
                                "#!" {
                                        puts [string trimleft $entry {#!}]
                                }
                        }
                        incr i
                        continue
                        }

                # process entries, find marker locations
                #   j   k     l m
                #   V   V     V V
                # 䥯  䥯 [ ba4 ] /plow/

                set j [string first " "  $entry]
                set k [string first "\[" $entry]
                set l [string first "\]" $entry]
                set m [string first "/"  $entry]

                set fanti   [string range $entry 0 $j]
                set jianti  [string range $entry $j $k]
                set pinyin  [convert_pinyin [string range $entry $k $l]]
                set meaning [convert_pinyin [string range $entry $m end]]

                # trim away unwanted markers
                set fanti [string trim $fanti]
                set jianti [string trim $jianti " \["]
                set pinyin [string trim $pinyin "\[\]"]
                set meaning [string trim $meaning "/"]


                #if { $i < 250} {
                        #puts "$i >>$fanti<<"
                        #puts "$i >>$jianti<<"
                        #puts "$i >>$pinyin<<"
                        #puts "$i >>$meaning<<"

                        set meaning [string map {/ ", " } $meaning]
                        set meaning [string trim $meaning ", "]

                        set str "f $fanti j $jianti p [list $pinyin] m [list $meaning.]"

                        puts $fp2 $str

                        mk::row append $view $str

                #}

                incr i


        }

        close $fp
close $fp2
        mk::file commit $view
        mk::file close $view

        return $i

}

#---------------
# Script called on search entry activation.
#---------------
proc on_entry_activate {w t {mode 3} } {
        # puts [info level 0 ]

        $::txt clear

        mk::loop i db.wordlist {
                set item [mk::get $i]

                if { [string first $t $item 0] >= 0  } {
                        # parts  {f j p  m }
                        if {$mode == 1 || $mode == 3 } { $::txt insert end "[mk::get $i f]\n" -tags headword }
                        if {$mode == 2 || $mode == 3 } { $::txt insert end "[mk::get $i j]\n" -tags headword }
                        $::txt insert end "[mk::get $i p]\n" -tags pinyin
                        $::txt insert end "[mk::get $i m]\n\n"
                        gnocl::update
                        }
                }
        $::txt search $t -tags searchMatch
}

#---------------
# create dictionary UI
# mode
#        1 = fanti
#        2 = jianti
#        3 = both
#---------------
proc viewChnEngDict { {mode 1} } {
        set box [gnocl::box -orientation vertical]
        set tb [gnocl::toolBar]

        set ent [gnocl::entry -baseFont {Sans 12} ]
        set txt [gnocl::text -editable 0 -wrapMode word]

        set ::txt $txt

        set ::mode $mode

        $::txt tag create headword -font {Sans 14} -paragraph #FAFAFA
        $::txt tag create pinyin -foreground blue
        $::txt tag create keyword -foreground red -fontStyle italic
        $::txt tag create searchMatch -background yellow

        $ent configure \
                -onActivate {
                        [gnocl::winfo toplevel %w ] configure -cursor watch
                        gnocl::update
                        on_entry_activate %w %t $::mode
                        [gnocl::winfo toplevel %w ] configure -cursor last
                        }  \
                -data $txt

        $tb add widget $ent

        $box add $tb -fill {1 1} -expand 0
        $box add $txt -fill {1 1 } -expand 1
        return $box
}

if {[file exists cedict.db] != 1} {
        puts "Obtaining most recent release of CEDict"
        get_CEDict
        puts "Starting Conversion."
        puts "[CEDict2metakit] lines read and converted"
        file delete *.zip
        file delete *u8
}

mk::file open db cedict.db
gnocl::window -child [viewChnEngDict] -width 300 -height 600 -onDelete { exit }

gnocl::mainLoop