The i18n package: data

if 0 {Richard Suchenwirth 2003-06-18 - The i18n package bundles most of the i18n work (and play) that I've done so far. That page contains both the man page and the code, but the gory detail data that are so necessary for i18n are on this page, reachable over the i18n::data interface. Save this page as i18n_data.tcl to the same directory where the i18n.tcl code sits, and you should be set...}

# Arabic: Buckwalter translit. to abstract Arab glyphs

 i18n::data arblish {
         ' \u0621 | \u0622 > \u0623 & \u0624  < \u0625 \} \u0626
        A \u0627 b \u0628 p \u0629 t \u062A  v \u062B j \u062C H \u062D 
        x \u062E d \u062F * \u0630 r \u0631 z \u0632  s \u0633 $ \u0634 
        S \u0635 D \u0636 T \u0637 Z \u0638 E \u0639 g \u063A  _ \u0640 
        f \u0641 C \u06A4 q \u0642 k \u0643 l \u0644 m \u0645 n \u0646 h \u0647  
        w \u0648 Y \u0649 y \u064A F \u064B N \u064C K \u064D a \u064e  
        u \u064F i \u0650 ~ \u0651 o \u0652 ` \u0670 \{ \u0671
        c \u0634 V \u0630
    } ;# allow modified Buckwalter: c, V, to avoid special chars

# Arabic: context mappings, both sides connected

 i18n::data ar_join2 {
        \u0622,,,,\u0644,, \uFEF6,,  \u0622,,,,\u0644 \uFEF5
        \u0622,, \uFE82 \u0622 \uFE81
        \u0623,,,,\u0644,, \uFEF8,,  \u0623,,,,\u0644 \uFEF7
        \u0623,, \uFE84 \u0623 \uFE83    \u0624,, \uFE86 \u0624 \uFE85
        \u0625,,,,\u0644,, \uFEFA,,  \u0625,,,,\u0644 \uFEF9
        \u0625,, \uFE88 \u0625 \uFE87
        ,,\u0626,, ,,\uFE8C,, \u0626,, \uFE8A,, ,,\u0626 ,,\uFE8B \u0626 \uFE89 
        \u0627,,,,\u0644,, \uFEFC,,  \u0627,,,,\u0644 \uFEFB
        \u0627,, \uFE8E,, \u0627 \uFE8D
        ,,\u0628,, ,,\uFE92,, \u0628,, \uFE90,, ,,\u0628 ,,\uFE91 \u0628 \uFE8F 
        \u0629,, \uFE94,, \u0629 \uFE93
        ,,\u062A,, ,,\uFE98,, \u062A,, \uFE96,, ,,\u062A ,,\uFE97 \u062A \uFE95 
        ,,\u062B,, ,,\uFE9C,, \u062B,, \uFE9A,, ,,\u062B ,,\uFE9B \u062B \uFE99 
        ,,\u062C,, ,,\uFEA0,, \u062C,, \uFE9E,, ,,\u062C ,,\uFE9F \u062C \uFE9D 
        ,,\u062D,, ,,\uFEA4,, \u062D,, \uFEA2,, ,,\u062D ,,\uFEA3 \u062D \uFEA1 
        ,,\u062E,, ,,\uFEA8,, \u062E,, \uFEA6,, ,,\u062E ,,\uFEA7 \u062E \uFEA5
        \u062F,, \uFEAA,, \u062F \uFEA9   \u0630,, \uFEAC,, \u0630 \uFEAB
        \u0631,, \uFEAE,, \u0631 \uFEAD   \u0632,, \uFEB0,, \u0632 \uFEAF
        ,,\u0633,, ,,\uFEB4,, \u0633,, \uFEB2,, ,,\u0633 ,,\uFEB3 \u0633 \uFEB1 
        ,,\u0634,, ,,\uFEB8,, \u0634,, \uFEB6,, ,,\u0634 ,,\uFEB7 \u0634 \uFEB5
        ,,\u0635,, ,,\uFEBC,, \u0635,, \uFEBA,, ,,\u0635 ,,\uFEBB \u0635 \uFEB9 
        ,,\u0636,, ,,\uFEC0,, \u0636,, \uFEBE,, ,,\u0636 ,,\uFEBF \u0636 \uFEBD 
        ,,\u0637,, ,,\uFEC4,, \u0637,, \uFEC2,, ,,\u0637 ,,\uFEC3 \u0637 \uFEC1 
        ,,\u0638,, ,,\uFEC8,, \u0638,, \uFEC6,, ,,\u0638 ,,\uFEC7 \u0638 \uFEC5
        ,,\u0639,, ,,\uFECC,, \u0639,, \uFECA,, ,,\u0639 ,,\uFECB \u0639 \uFEC9 
        ,,\u063A,, ,,\uFED0,, \u063A,, \uFECE,, ,,\u063A ,,\uFECF \u063A \uFECD 
        ,,\u0641,, ,,\uFED4,, \u0641,, \uFED2,, ,,\u0641 ,,\uFED3 \u0641 \uFED1 
        ,,\u0642,, ,,\uFED8,, \u0642,, \uFED6,, ,,\u0642 ,,\uFED7 \u0642 \uFED5
        ,,\u0643,, ,,\uFEDC,, \u0643,, \uFEDA,, ,,\u0643 ,,\uFEDB \u0643 \uFED9 
        ,,\u0644,, ,,\uFEE0,, \u0644,, \uFEDE,, ,,\u0644 ,,\uFEDF \u0644 \uFEDD 
        ,,\u0645,, ,,\uFEE4,, \u0645,, \uFEE2,, ,,\u0645 ,,\uFEE3 \u0645 \uFEE1 
        ,,\u0646,, ,,\uFEE8,, \u0646,, \uFEE6,, ,,\u0646 ,,\uFEE7 \u0646 \uFEE5
        ,,\u0647,, ,,\uFEEC,, \u0647,, \uFEEA,, ,,\u0647 ,,\uFEEB \u0647 \uFEE9 
        \u0648,, \uFEEE,, \u0648 \uFEED   \u0649,, \uFEF0,, \u0649 \uFEEF
        ,,\u064A,, ,,\uFEF4,, \u064A,, \uFEF2,, ,,\u064A ,,\uFEF3 \u064A \uFEF1
 }

# Greek alphabet

 i18n::data greeklish {
      A' \u0386 E' \u0388 H' \u0389 I' \u038a O' \u038c Y' \u038e W' \u038f
      A \u0391 B \u0392 G \u0393 D \u0394 E \u0395 Z \u0396
      H \u0397 Q \u0398 I \u0399 K \u039A L \u039b M \u039c
      N \u039d J \u039e O \u039f P \u03a0 R \u03a1 S \u03a3
      T \u03a4 U \u03a5 F \u03a6 X \u03a7 Y \u03a8 W \u03a9
      a' \u03ac e' \u03ad h' \u03ae i' \u03af o' \u03cc u' \u03cd w' \u03ce
      a \u03b1 b \u03b2 g \u03b3 d \u03b4 e \u03b5 z \u03b6
      h \u03b7 q \u03b8 i \u03b9 k \u03bA l \u03bb m \u03bc
      n \u03bd j \u03be o \u03bf p \u03c0 r \u03c1 c \u03c2 s \u03c3
      t \u03c4 u \u03c5 f \u03c6 x \u03c7 y \u03c8 w \u03c9
 }

# Hebrew alphabet (undotted)

 i18n::data heblish {
    a \u5d0 b \u5d1 g \u5d2 d \u5d3 h \u5d4 w \u5d5 z \u5d6 x \u5d7 u \u5d8
    i \u5d9 K \u5da k \u5db l \u5dc M \u5dd m \u5de N \u5df n \u5e0 o \u5e1
    e \u5e2 F \u5e3 f \u5e4 Y \u5e5 y \u5e6 q \u5e7 r \u5e8 s \u5e9 t \u5ea
 }

# Two-letter language codes defined in ISO 639

 i18n::data iso639 {
  aa Afar ab Abkhazian af Afrikaans am Amharic ar Arabic as Assamese ay Aymara
  az Azerbaijani ba Bashkir be Byelorussian bg Bulgarian bh Bihari bi Bislama
  bn Bengali bo Tibetan br Breton ca Catalan co Corsican cs Czech cy Welsh
  da Danish de German dz Bhutani el Greek en English eo Esperanto es Spanish
  et Estonian eu Basque fi Finnish fj Fiji fo Faroese fr French fy Frisian
  ga Irish gl Galician gn Guarani gu Gujarati ha Hausa hi Hindi hr Croatian
  hu Hungarian hy Armenian ia Interlingua ie Interlingue ik Inupiak in Indonesian
  is Icelandic it Italian he Hebrew ja Japanese jv Javanese ka Georgian kk Kazakh
  kl Greenlandic km Cambodian kn Kannada ko Korean ks Kashmiri ku Kurdish ky Kirghiz
  la Latin li Limburgs ln Lingala lo Laothian lt Lithuanian lv Latvian mg Malagasy
  mi Maori mk Macedonian ml Malayalam mn Mongolian mo Moldavian mr Marathi ms Malay
  mt Maltese my Burmese na Nauru ne Nepali nl Dutch no Norwegian oc Occitan om Afan(Oromo)
  or Oriya pa Punjabi pl Polish ps Pashto pt Portuguese qu Quechua rm Rhaeto-Romance
  rn Kurundi ro Romanian ru Russian rw Kinyarwanda sa Sanskrit sd Sindhi sg Sangho
  sh Serbo-Croatian si Singhalese sk Slovak sl Slovenian sm Samoan sn Shona so Somali
  sq Albanian sr Serbian ss Siswati st Sesotho su Sundanese sv Swedish sw Swahili
  ta Tamil te Telugu tg Tajik th Thai ti Tigrinya tk Turkmen tl Tagalog tn Setswana
  to Tonga tr Turkish ts Tsonga tt Tatar tw Twi ug Uighur uk Ukrainian ur Urdu uz Uzbek
  vi Vietnamese vo Volapük wo Wolof xh Xhosa yi Yiddish yo Yoruba zh Chinese zu Zulu
 }

# Japanese Kana (hiragana, KATAKANA)

 i18n::data kana {
      cha \u3061\u3083 chu \u3061\u3085 cho \u3061\u3087 sha \u3057\u3083
      shu \u3057\u3085 sho \u3057\u3087 kya \u304d\u3083 kyu \u304d\u3085
      kyo \u304d\u3087 rya \u308a\u3083 ryu \u308a\u3085 ryo \u308a\u3087
      pya \u3074\u3083 pyu \u3074\u3085 pyo \u3074\u3087
      ka \u304b ga \u304c ki \u304d gi \u304e ku \u304f gu \u3050 ke \u3051
      ge \u3052 ko \u3053 go \u3054 sa \u3055 za \u3056 shi \u3057 ji \u3058
      su \u3059 zu \u305a se \u305b ze \u305c so \u305d zo \u305e
      ta \u305f da \u3060 chi \u3061 di \u3062 tsu \u3064 dsu \u3065
      te \u3066 de \u3067 to \u3068 do \u3069 na \u306a ni \u306b nu \u306c
      ne \u306d no \u306e ha \u306f ba \u3070 pa \u3071 hi \u3072 bi \u3073
      pi \u3074 fu \u3075 bu \u3076 pu \u3077 he \u3078 be \u3079 pe \u307a
      ho \u307b bo \u307c po \u307d ma \u307e mi \u307f mu \u3080 me \u3081
      mo \u3082 ya \u3084 yu \u3086 yo \u3088 ra \u3089 ri \u308a ru \u308b
      re \u308c ro \u308d wa \u308f wo \u3092 n \u3093  a \u3042 i \u3044
      u \u3046 e \u3048 o \u304a  k \u3063 p \u3063 t \u3063
      CHA \u3061\u30e3 CHU \u3061\u30e5 CHO \u3061\u30e7 SHA \u30b7\u30e3
      SHU \u30b7\u30e5 SHO \u30b7\u30e7 KYA \u30ad\u30e3 KYU \u30ad\u30e5
      KYO \u30ad\u30e7 RYA \u30ea\u30e3 RYU \u30ea\u30e5 RYO \u30ea\u30e7
      PYA \u30d4\u30e3 PYU \u30d4\u30e5 PYO \u30d4\u30e7
      KA \u30ab GA \u30ac KI \u30ad GI \u30ae KU \u30af GU  \u30b0 KE \u30b1
      GE \u30b2 KO \u30b3 GO \u30b4 SA \u30b5 ZA \u30b6 SHI \u30b7 JI \u30b8
      SU \u30b9 ZU \u30ba SE \u30bb ZE \u30bc SO \u30bd ZO \u30be
      TA \u30bf DA \u30c0 CHI \u30c1 DI \u30c2 TSU \u30c4 DSU \u30c5
      TE \u30c6 DE \u30c7 TO \u30c8 DO \u30c9 NA \u30ca NI \u30cb NU \u30cc
      NE \u30cd NO \u30ce HA \u30cf BA \u30d0 PA \u30d1 HI \u30d2 BI \u30d3
      PI \u30d4 FU \u30d5 BU \u30d6 PU \u30d7 HE \u30d8 BE \u30d9 PE \u30da
      HO \u30db BO \u30dc PO \u30dd MA \u30de MI \u30df MU \u30e0 ME \u30e1
      MO \u30e2 YA \u30e4 YU \u30e6 YO \u30e8 RA \u30e9 RI \u30ea RU \u30eb
      RE \u30ec RO \u30ed WA \u30ef WO \u30f2 N \u30f3  A \u30a2 I \u30a4
      U \u30a6 E \u30a8 O \u30aa  K \u30c3 P \u30c3 T \u30c3 - \u30fc
 }

# Chinese Pinyin syllables and start points in decimal GB2312-80

 i18n::data pinyin { a 1601 ai 1603 an 1618 ang 1625 ao 1628
 ba 1637 bai 1655 ban 1663 bang 1678 bao 1690 bei 1713 ben 1728 beng 1732
 bi 1738 bian 1762 biao 1774 bie 1778 bin 1782 bing 1787 bo 1803 bu 1822
 ca 1833 cai 1834 can 1844 cang 1852 cao 1857 ce 1862 ceng 1867 cha 1869 chai 1881
 chan 1884 chang 1893 chao 1912 che 1921 chen 1927 cheng 1937 chi 1952 chong 1968 
 chou 1973 chu 1985 chuan 2008 chuang 2015 chui 2021 chun 2026 chuo 2033 ci 2035
 cong 2047 cou 2053 cu 2054 cuan 2058 cui 2061 cun 2069 cuo 2072
 da 2078 dai 2084 dan 2102 dang 2117 dao 2122 de 2134 deng 2137 di 2144 dian 2163
 diao 2179 die 2188 ding 2201 diu 2210 dong 2211 dou 2221 du 2228 duan 2243
 dui 2249 dun 2253 duo 2262 e 2274 en 2287 er 2288
 fa 2302 fan 2310 fang 2327 fei 2338 fen 2350 feng 2365 fo 2380 fou 2381 fu 2382
 ga 2433 gan 2441 gang 2452 gao 2461 ge 2471 gei 2488 gen 2489 geng 2491 gong 2504
 gou 2519 gu 2528 gua 2546 guai 2552 guan 2555 guang 2566 gui 2569 gun 2585
 guo 2588 ha 2594 hai 2601 han 2608 hang 2628 hao 2630 he 2639 hei 2657 hen 2659
 heng 2663 hong 2668 hou 2677 hu 2684 hua 2708 huai 2717 huan 2722 huang 2736
 hui 2750 hun 2771 huo 2778 ji 2787 jia 2846 jian 2863 jiang 2909 jiao 2922
 jie 2950 jin 2977 jing 3005 jiong 3028 jiu 3030 ju 3047 juan 3072 jue 3085 jun 3089
 ka 3106 kai 3110 kan 3115 kang 3121 kao 3128 ke 3132 ken 3147 kong 3113 kou 3157
 ku 3161 kua 3169 kuai 3173 kuan 3177 kuang 3179 kui 3187 kun 3204 kuo 3208
 la 3212 lai 3219 lan 3221 lang 3237 lao 3244 le 3253 leng 3266 li 3269 lia 3309
 lian 3310 liang 3324 liao 3335 lie 3348 lin 3353 ling 3364 liu 3379
 long 3390 lou 3405 lu 3411 lü 3432 lua 3445 lun 3453 luo 3460
 ma 3472 mai 3481 man 3487 mang 3502 mao 3508 me 3520 mei 3521 men 3537 meng 3540
 mi 3548 mian 3562 miao 3571 mie 3580 min 3581 ming 3587 mo 3594 mou 3617
 mu 3620 na 3635 nai 3641 nan 3647 nao 3652 ne 3656 neng 3660 ni 3661 nian 3672
 niang 3679 niao 3681 nie 3683 nin 3690 ning 3691 niu 3703 nong 3707 nu 3711
 nü 3714 nuan 3715 nuo 3721 o 3722 ou 3723
 pa 3730 pai 3736 pan 3742 pang 3750 pao 3756 pe 3762 pei 3771 peng 3773 pi 3787
 pian 3810 piao 3814 pin 3820 ping 3825 po 3834 pu 3843
 qi 3858 qia 3901 qian 3903 qiang 3925 qiao 3933 qie 3948 qin 3953 qing 3964
 qiong 3978 qiu 3979 qu 3987 quan 4006 que 4017 qun 4025
 ran 4027 rang 4031 rao 4036 re 4039 ren 4041 reng 4051 ri 4053 rong 4056
 rou 4064 ru 4067 ruan 4077 rui 4080 ruo 4084
 sa 4086 sai 4089 san 4093 sang 4103 sao 4106 se 4110 sen 4113 sha 4115 shan 4127
 shang 4142 shao 4150 she 4163 shen 4173 sheng 4189 shi 4206 shou 4253 shu 4263
 shua 4302 shuan 4308 shuang 4310 shui 4313 shun 4318 shuo 4321 si 4325 song 4341
 sou 4349 su 4351 suan 4365 sui 4368 sun 4378 suo 4385
 ta 4390 tai 4405 tan 4414 tang 4432 tao 4445 te 4456 teng 4457 ti 4461 tian 4476
 tiao 4484 tie 4489 ting 4492 tong 4508 tou 4521 tu 4525 tuan 4536 tui 4538
 tun 4544 tuo 4547
 wa 4558 wai 4564 wan 4567 wang 4584 wei 4594 wen 4633 weng 4643 wo 4646 wu 4655
 xi 4684 xia 4725 xian 4738 xiang 4764 xiao 4784 xie 4808 xin 4829
 xing 4839 xiong 4854 xiu 4861 xu 4870 xuan 4889 xue 4905 xun 4911
 ya 4925 yan 4941 yang 4974 yao 4991 ye 5012 yi 5027 yin 5080 ying 5102
 yong 5120 you 5136 yu 5156 yuan 5206 yue 5227 yun 5237
 za 5251 zai 5252 zan 5259 zang 5263 zao 5266 ze 5280 zen 5285 zeng 5286
 zha 5290 zhai 5310 zhan 5319 zhang 5333 zhao 5348 zhe 5358 zhen 5368
 zheng 5384 zhi 5405 zhong 5448 zhou 5459 zhu 5473 zhua 5505 zhuan 5508
 zhuang 5514 zhui 5521  zhun 5528 zhuo 5529 zi 5540 zong 5555 zou 5562
 zu 5566 zua 5574 zui 5576 zun 5580 zuo 5582 - 5590
 }

# Cyrillic alphabet, Ruslish transliteration

 i18n::data ruslish {
          !A \u042F !C \u0427 !E \u042d !O \u0401 !S \u0428
          !T \u0429 !U \u042e !Z \u0416
          A \u0410 B \u0411 V \u0412 G \u0413 D \u0414 E \u0415
          Z \u0417 I \u0418 J \u0419 K \u041A L \u041b
          M \u041c N \u041d O \u041e P \u041f R \u0420 S \u0421
          T \u0422 U \u0423 F \u0424 X \u0425 C \u0426 
          Q \u042a Y \u042b H \u042c
          !a \u044F !c \u0447 !e \u044d !o \u0451 !s \u0448
          !t \u0449 !u \u044e !z \u0436
          a \u0430 b \u0431 v \u0432 g \u0433 d \u0434 e \u0435
          z \u0437 i \u0438 j \u0439 k \u043a l \u043b
          m \u043c n \u043d o \u043e p \u043f r \u0440 s \u0441
          t \u0442 u \u0443 f \u0444 x \u0445 c \u0446
          q \u044a y \u044b h \u044c 
 }

# Descriptive strings for Unicode ranges

 i18n::data tell {
    ascii {0 0x7F} latin1 {0x80 0xFF} latin2 {0x100 0x1FF} ipa {0x200 0x2ff}
    greek {0x384 0x3d6} cyrillic {0x0400 0x04FF} hebrew {0x5b0 0x5f4}
    thai {0x0e01 0x0e5b} latin3 {0x1e00 0x1eff}
    hiragana {0x3041 0x309e} katakana {0x30A1 0x30FE}
    cjkIdeograph {0x4E00 0x9FA5}
 }

Two useful functions for converting traditional <-> simplified Chinese characters, which should also go into this package, are fan2jian and jian2fan.