tools/tools/vt/keymaps/convert-keymap.pl

   1 #!/usr/bin/perl
   2 # $FreeBSD$
   3
   4 use Text::Iconv;
   5 use Encode;
   6 use strict;
   7 use utf8;
   8
   9 # command line parsing
  10 die "Usage: $0 filename.kbd charset [EURO|YEN]\n"
  11     unless ($ARGV[1]);
  12
  13 my $inputfile = shift;                                  # first command argument
  14 my $converter = Text::Iconv->new(shift, "UTF-8");       # second argument
  15 my $use_euro;
  16 my $use_yen;
  17 my $current_char;
  18 my $current_scancode;
  19
  20 while (my $arg = shift) {
  21     $use_euro = 1, next
  22         if $arg eq "EURO";
  23     $use_yen = 1, next
  24         if $arg eq "YEN";
  25     die "Unknown encoding option '$arg'\n";
  26 }
  27
  28 # converter functions
  29 sub local_to_UCS_string
  30 {
  31     my ($string) = @_;
  32
  33     return $converter->convert($string);
  34 }
  35
  36 sub prettyprint_token
  37 {
  38     my ($ucs_char) = @_;
  39
  40     return "'" . chr($ucs_char) . "'"
  41         if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
  42 #    return sprintf "%d", $ucs_char; # <---- temporary decimal
  43     return sprintf "0x%02x", $ucs_char
  44         if $ucs_char <= 255;        # print as hex number, else
  45     return sprintf "0x%04x", $ucs_char;
  46 }
  47
  48 sub local_to_UCS_code
  49 {
  50     my ($char) = @_;
  51
  52     my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
  53
  54     $current_char = lc(chr($ucs_char))
  55         if $current_char eq "";
  56
  57     $ucs_char = 0x20ac  # replace with Euro character
  58         if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
  59
  60     $ucs_char = 0xa5    # replace with Jap. Yen character on PC kbd
  61         if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
  62
  63 #    $ucs_char = 0xa5   # replace with Jap. Yen character on PC98x1 kbd
  64 #       if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13;
  65
  66     return prettyprint_token($ucs_char);
  67 }
  68
  69 sub malformed_to_UCS_code
  70 {
  71     my ($char) = @_;
  72
  73     return prettyprint_token(ord(Encode::decode("UTF-8", $char)));
  74 }
  75
  76 sub convert_token
  77 {
  78     my ($C) = @_;
  79
  80     return $1
  81         if $C =~ m/^([a-z][a-z0-9]*)$/;         # key token
  82     return local_to_UCS_code(chr($1))
  83         if $C =~ m/^(\d+)$/;                    # decimal number
  84     return local_to_UCS_code(chr(hex($1)))
  85         if $C =~ m/^0x([0-9a-f]+)$/i;           # hex number
  86     return local_to_UCS_code(chr(ord($1)))
  87         if $C =~ m/^'(.)'$/;                    # character
  88     return malformed_to_UCS_code($1)
  89         if $C =~ m/^'(.+)'$/;                   # character
  90     return "<?$C?>";                            # uncovered case
  91 }
  92
  93 sub tokenize { # split on white space and parentheses (but not within token)
  94     my ($line) = @_;
  95
  96     $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
  97     $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
  98     $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
  99     $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses
 100     my $matches;
 101     do {
 102         $matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
 103     } while $matches;
 104     $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
 105     my @KEYTOKEN = split (" ", $line);
 106     grep(s/_squote[LR]?_/'/g, @KEYTOKEN);
 107     grep(s/_spc_/' '/, @KEYTOKEN);
 108     grep(s/_lpar_/'('/, @KEYTOKEN);
 109     grep(s/_rpar_/')'/, @KEYTOKEN);
 110     return @KEYTOKEN;
 111 }
 112
 113 # main program
 114 open FH, "<$inputfile";
 115 while (<FH>) {
 116     if (m/^#/) {
 117         print local_to_UCS_string($_);
 118     } elsif (m/^\s*$/) {
 119         print "\n";
 120     } else {
 121         my @KEYTOKEN = tokenize($_);
 122         my $at_bol = 1;
 123         my $C;
 124         foreach $C (@KEYTOKEN) {
 125             if ($at_bol) {
 126                 $current_char = "";
 127                 $current_scancode = -1;
 128                 if ($C =~ m/^\s*\d/) { # line begins with key code number
 129                     $current_scancode = $C;
 130                     printf "  %03d   ", $C;
 131                 } elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
 132                     printf "  %-4s ", $C; # accent name starts accent definition
 133                 } elsif ($C eq "(") {
 134                     printf "%17s", "( "; # paren continues accent definition
 135                 } else {
 136                     print "Unknown input line format: $_";
 137                 }
 138                 $at_bol = 0;
 139             } else {
 140                 if ($C =~ m/^([BCNO])$/) {
 141                     print " $1"; # special case: effect of Caps Lock/Num Lock
 142                 } elsif ($C eq "(") {
 143                     $current_char = "";
 144                     print " ( ";
 145                 } elsif ($C eq ")") {
 146                     print " )";
 147                 } else {
 148                     printf "%-6s ", convert_token($C);
 149                 }
 150             }
 151         }
 152         print "\n";
 153     }
 154 }
 155 close FH;