[go: up one dir, main page]

File: mbwcweight.pl

package info (click to toggle)
libmoe 1.5.2-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 6,772 kB
  • ctags: 267,598
  • sloc: ansic: 478,484; perl: 2,308; makefile: 199; sh: 22
file content (176 lines) | stat: -rw-r--r-- 4,415 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
BEGIN {require 'mbcesdefs.pl';}

$cescf_h = 'mbcesconf.h' if (!defined($cescf_h));
$cf_h = 'mblangconf.h' if (!defined($cf_h));
$wcw_h = 'mbwcweight.mk_btri.h' if (!defined($wcw_h));

local (*C, *W);

open(C, ">$cf_h") || die "open(C, \">$cf_h\"): $!";
open(W, ">$wcw_h") || die "open(W, \">$wcw_h\"): $!";

my @mb_lang =
  (

   ['CN',

    [qw(UTF8),
     0x7684, 0x7684, 10,
     0x662F, 0x662F, 10,
     0x5728, 0x5728, 10,
     0x4E5F, 0x4E5F, 10,
     ],

    [qw(GB2312),
     (map {
       my ($c1, $c2) = map {(hex($_) & 0x7F) - 0x21} /([0-9A-Fa-f]{2})/g;
       my $wc = MB_WORD_94x94_ENC(0x41, $c1 * 94 + $c2);

       ($wc, $wc, 10);
     } qw(B5C4 CAC7 D2B2 D4DA)),
     ],

    [qw(EUC_TW),
     (map {
       my ($c1, $c2) = map {(hex($_) & 0x7F) - 0x21} /([0-9A-Fa-f]{2})/g;
       my $wc = MB_WORD_94x94_ENC(0x47, $c1 * 94 + $c2);

       ($wc, $wc, 10);
     } qw(D1D2 CEFB C7E3 C4BE)),
     ],

    [qw(BIG5),
     (map {
       my $wc = MB_BIG5_ENC(map {hex($_)} /([0-9A-Fa-f]{2})/g);

       ($wc, $wc, 10);
     } qw(AABA AC4F A662 A45D)),
     ],

    ],

   ['JA',

    [qw(EUC_JP),
     (map {
       my ($c1, $c2, $c3, $c4, $wt) = map {hex($_)} /([0-9A-Fa-f]{2})/g;
       my $wc1 = MB_WORD_94x94_ENC(0x42, ($c1 - 0xA1) * 94 + $c2 - 0xA1);
       my $wc2 = MB_WORD_94x94_ENC(0x42, ($c3 - 0xA1) * 94 + $c4 - 0xA1);

       ($wc1, $wc2, $wt);
     } qw(A1A2-A1A2-06 A1A3-A1A3-06 A1A4-A1A4-06 A1A5-A1A5-06 A4A1-A4F3-04 A5A1-A5F6-02)),
     ],

    [qw(SJIS),
     (map {
       my ($c1, $c2, $c3, $c4, $wt) = map {hex($_)} /([0-9A-Fa-f]{2})/g;
       my $wc1 = MB_SJIS_ENC($c1, $c2);
       my $wc2 = MB_SJIS_ENC($c3, $c4);

       ($wc1, $wc2, $wt);
     } qw(8141-8141-06 8142-8142-06 8143-8143-06 8144-8144-06 829F-82F1-04 8340-8396-02)),
     ],

    [qw(UTF8),
     0x3001,0x3001,6,
     0x3002,0x3002,6,
     0xFF0C,0xFF0C,6,
     0xFF0E,0xFF0E,6,
     0x3041,0x3093,4,
     0x30A1,0x30FE,2,
     ],

    ],

   ['KR',

    [qw(EUC_KR),
     (map {
       my ($c1, $c2, $wt) = map {hex($_)} /([0-9A-Fa-f]{2})/g;
       my $wc = MB_WORD_94x94_ENC(0x43, ($c1 - 0xA1) * 94 + $c2 - 0xA1);

       ($wc, $wc, $wt);
     } qw(B4D9-06 C0CC-06
	  B4C2-04 C7CF-04
	  B0A1-02 B0ED-02 B1E2-02 B5A5-02 B7CE-02 B8A6-02
	  B8AE-02 BAF1-02 BBE7-02 BCAD-02 BDBA-02 BEC8-02
	  BEEE-02 BFA1-02 C0BA-02 C0BB-02 C0C7-02 C0D6-02
	  C0DA-02 C1F6-02 C6AE-02 C7D1-02)),
     ],

    [qw(JOHAB),
     (map {
       my ($c1, $c2, $wt) = map {hex($_)} /([0-9A-Fa-f]{2})/g;
       my $wc = MB_JOHAB_ENC($c1, $c2);

       ($wc, $wc, $wt);
     } qw(9461-06 B7A1-06
	  9365-04 D061-04
	  8861-02 89A1-02 8BA1-02 9541-02 9DA1-02 9F69-02
	  9FA1-02 A7A1-02 AC61-02 ACE1-02 AF61-02 B465-02
	  B4E1-02 B541-02 B765-02 B769-02 B781-02 B7B6-02
	  B861-02 BBA1-02 CB61-02 D065-02)),
     ],

    [qw(UTF8),
     0xAC00, 0xAC00, 2,		# B0A1
     0xACE0, 0xACE0, 2,		# B0ED
     0xAE30, 0xAE30, 2,		# B1E2
     0xB294, 0xB294, 4,		# B4C2
     0xB2E4, 0xB2E4, 6,		# B4D9
     0xB370, 0xB370, 2,		# B5A5
     0xB85C, 0xB85C, 2,		# B7CE
     0xB97C, 0xB97C, 2,		# B8A6
     0xB9AC, 0xB9AC, 2,		# B8AE
     0xBE44, 0xBE44, 2,		# BAF1
     0xC0AC, 0xC0AC, 2,		# BBE7
     0xC11C, 0xC11C, 2,		# BCAD
     0xC2A4, 0xC2A4, 2,		# BDBA
     0xC548, 0xC548, 2,		# BEC8
     0xC5B4, 0xC5B4, 2,		# BEEE
     0xC5D0, 0xC5D0, 2,		# BFA1
     0xC740, 0xC740, 2,		# C0BA
     0xC744, 0xC744, 2,		# C0BB
     0xC758, 0xC758, 2,		# C0C7
     0xC774, 0xC774, 6,		# C0CC
     0xC788, 0xC788, 2,		# C0D6
     0xC790, 0xC790, 2,		# C0DA
     0xC9C0, 0xC9C0, 2,		# C1F6
     0xD2B8, 0xD2B8, 2,		# C6AE
     0xD558, 0xD558, 4,		# C7CF
     0xD55C, 0xD55C, 2,		# C7D1
     ],

    ],

   );

my (@tab, $lang, $wcwv);

foreach $lang (@mb_lang) {
  my $lang_id = $lang->[0];

  print C "#define USE_$lang_id\n";

  foreach $wcwv (@{$lang}[1 .. $#$lang]) {
    my $ces_id = $wcwv->[0];
    my $i;

    $ces_id = '' if (ref(CES->by_id($ces_id)->endecoder_spec) ne 'ARRAY');

    for ($i = 3 ; $i < @$wcwv ; $i += 3) {
      push(@tab, [@{$wcwv}[$i-2,$i-1,$i], $ces_id, $lang_id]);
    }
  }
}

print C "#define USE_UCS\n";
print W "#include \"$cescf_h\"\n#include \"$cf_h\"\n%%TYPE number\n%%BEGIN\n";

foreach (sort {$a->[0] <=> $b->[0]} @tab) {
  my $c = sprintf('0x%X-0x%X,%uU', @{$_}[0,1,2]);

  $c = sprintf("#ifdef USE_%s\n%s\n#endif", $_->[3], $c) if ($_->[3] ne '');
  $c = sprintf("#ifdef USE_%s\n%s\n#endif", $_->[4], $c);
  print W $c, "\n";
}