10 die "Usage: $0 filename.kbd charset [EURO|YEN]\n"
13 my $inputfile = shift; # first command argument
14 my $converter = Text::Iconv->new(shift, "UTF-8"); # second argument
20 while (my $arg = shift) {
25 die "Unknown encoding option '$arg'\n";
29 sub local_to_UCS_string
33 return $converter->convert($string);
40 return "'" . chr($ucs_char) . "'"
41 if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
42 # return sprintf "%d", $ucs_char; # <---- temporary decimal
43 return sprintf "0x%02x", $ucs_char
44 if $ucs_char <= 255; # print as hex number, else
45 return sprintf "0x%04x", $ucs_char;
52 my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
54 $current_char = lc(chr($ucs_char))
55 if $current_char eq "";
57 $ucs_char = 0x20ac # replace with Euro character
58 if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
60 $ucs_char = 0xa5 # replace with Jap. Yen character on PC kbd
61 if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
63 # $ucs_char = 0xa5 # replace with Jap. Yen character on PC98x1 kbd
64 # if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13;
66 return prettyprint_token($ucs_char);
69 sub malformed_to_UCS_code
73 return prettyprint_token(ord(Encode::decode("UTF-8", $char)));
81 if $C =~ m/^([a-z][a-z0-9]*)$/; # key token
82 return local_to_UCS_code(chr($1))
83 if $C =~ m/^(\d+)$/; # decimal number
84 return local_to_UCS_code(chr(hex($1)))
85 if $C =~ m/^0x([0-9a-f]+)$/i; # hex number
86 return local_to_UCS_code(chr(ord($1)))
87 if $C =~ m/^'(.)'$/; # character
88 return malformed_to_UCS_code($1)
89 if $C =~ m/^'(.+)'$/; # character
90 return "<?$C?>"; # uncovered case
93 sub tokenize { # split on white space and parentheses (but not within token)
96 $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
97 $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
98 $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
99 $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses
102 $matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
104 $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
105 my @KEYTOKEN = split (" ", $line);
106 grep(s/_squote[LR]?_/'/g, @KEYTOKEN);
107 grep(s/_spc_/' '/, @KEYTOKEN);
108 grep(s/_lpar_/'('/, @KEYTOKEN);
109 grep(s/_rpar_/')'/, @KEYTOKEN);
114 open FH, "<$inputfile";
117 print local_to_UCS_string($_);
121 my @KEYTOKEN = tokenize($_);
124 foreach $C (@KEYTOKEN) {
127 $current_scancode = -1;
128 if ($C =~ m/^\s*\d/) { # line begins with key code number
129 $current_scancode = $C;
131 } elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
132 printf " %-4s ", $C; # accent name starts accent definition
133 } elsif ($C eq "(") {
134 printf "%17s", "( "; # paren continues accent definition
136 print "Unknown input line format: $_";
140 if ($C =~ m/^([BCNO])$/) {
141 print " $1"; # special case: effect of Caps Lock/Num Lock
142 } elsif ($C eq "(") {
145 } elsif ($C eq ")") {
148 printf "%-6s ", convert_token($C);