#!/usr/bin/perl # $FreeBSD$ use Text::Iconv; use Encode; use strict; use utf8; # command line parsing die "Usage: $0 filename.kbd charset [EURO|YEN]\n" unless ($ARGV[1]); my $inputfile = shift; # first command argument my $converter = Text::Iconv->new(shift, "UTF-8"); # second argument my $use_euro; my $use_yen; my $current_char; my $current_scancode; while (my $arg = shift) { $use_euro = 1, next if $arg eq "EURO"; $use_yen = 1, next if $arg eq "YEN"; die "Unknown encoding option '$arg'\n"; } # converter functions sub local_to_UCS_string { my ($string) = @_; return $converter->convert($string); } sub prettyprint_token { my ($ucs_char) = @_; return "'" . chr($ucs_char) . "'" if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible # return sprintf "%d", $ucs_char; # <---- temporary decimal return sprintf "0x%02x", $ucs_char if $ucs_char <= 255; # print as hex number, else return sprintf "0x%04x", $ucs_char; } sub local_to_UCS_code { my ($char) = @_; my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char))); $current_char = lc(chr($ucs_char)) if $current_char eq ""; $ucs_char = 0x20ac # replace with Euro character if $ucs_char == 0xa4 and $use_euro and $current_char eq "e"; $ucs_char = 0xa5 # replace with Jap. Yen character on PC kbd if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125; # $ucs_char = 0xa5 # replace with Jap. Yen character on PC98x1 kbd # if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13; return prettyprint_token($ucs_char); } sub malformed_to_UCS_code { my ($char) = @_; return prettyprint_token(ord(Encode::decode("UTF-8", $char))); } sub convert_token { my ($C) = @_; return $1 if $C =~ m/^([a-z][a-z0-9]*)$/; # key token return local_to_UCS_code(chr($1)) if $C =~ m/^(\d+)$/; # decimal number return local_to_UCS_code(chr(hex($1))) if $C =~ m/^0x([0-9a-f]+)$/i; # hex number return local_to_UCS_code(chr(ord($1))) if $C =~ m/^'(.)'$/; # character return malformed_to_UCS_code($1) if $C =~ m/^'(.+)'$/; # character return ""; # uncovered case } sub tokenize { # split on white space and parentheses (but not within token) my ($line) = @_; $line =~ s/'$'/ _lpar_ /g; # prevent splitting of '(' $line =~ s/'$'/ _rpar_ /g; # prevent splitting of ')' $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses my $matches; do { $matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g); } while $matches; $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' ' my @KEYTOKEN = split (" ", $line); grep(s/_squote[LR]?_/'/g, @KEYTOKEN); grep(s/_spc_/' '/, @KEYTOKEN); grep(s/_lpar_/'('/, @KEYTOKEN); grep(s/_rpar_/')'/, @KEYTOKEN); return @KEYTOKEN; } # main program open FH, "<$inputfile"; while () { if (m/^#/) { print local_to_UCS_string($_); } elsif (m/^\s*$/) { print "\n"; } else { my @KEYTOKEN = tokenize($_); my $at_bol = 1; my $C; foreach $C (@KEYTOKEN) { if ($at_bol) { $current_char = ""; $current_scancode = -1; if ($C =~ m/^\s*\d/) { # line begins with key code number $current_scancode = $C; printf " %03d ", $C; } elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren printf " %-4s ", $C; # accent name starts accent definition } elsif ($C eq "(") { printf "%17s", "( "; # paren continues accent definition } else { print "Unknown input line format: $_"; } $at_bol = 0; } else { if ($C =~ m/^([BCNO])$/) { print " $1"; # special case: effect of Caps Lock/Num Lock } elsif ($C eq "(") { $current_char = ""; print " ( "; } elsif ($C eq ")") { print " )"; } else { printf "%-6s ", convert_token($C); } } } print "\n"; } } close FH;