diff options
author | Adam Tlalka <atlka@pg.gda.pl> | 2006-09-29 01:59:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-29 09:18:11 -0700 |
commit | d4328b40af16bae62ff8f854060d33daad237093 (patch) | |
tree | 7fdb3bb354da8a04efc19ec0eb9cdabe7adef98e /drivers | |
parent | 08c67d2a5d1c97b7997dc9589f702d875c63de07 (diff) | |
download | lwn-d4328b40af16bae62ff8f854060d33daad237093.tar.gz lwn-d4328b40af16bae62ff8f854060d33daad237093.zip |
[PATCH] console utf-8 mode fixes
Fix utf-8 mode so alternate charset modes always work according to control
sequences interpreted in do_con_trol function preserving backward US-ASCII
and VT100 semigraphics compatibility.
Malformed utf-8 sequences are represented as sequences of replacement
glyphs,original codes or '?' as a last resort.
unicode-xterm, gnome-terminal, kconsole and other terminal emulators in
utf-8 mode respect acsc, enacs, rmacs sequences. Also I found that some
important system programs (from Debian distro) uses acsc in utf-8 mode -
dselect, aptitude, w3m for example.
Signed-off-by: Adam Tlalka <atlka@pg.gda.pl>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/char/vt.c | 79 |
1 files changed, 52 insertions, 27 deletions
diff --git a/drivers/char/vt.c b/drivers/char/vt.c index d7d880f8147b..0fca83ededff 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -63,6 +63,13 @@ * * Removed console_lock, enabled interrupts across all console operations * 13 March 2001, Andrew Morton + * + * Fixed UTF-8 mode so alternate charset modes always work according + * to control sequences interpreted in do_con_trol function + * preserving backward VT100 semigraphics compatibility, + * malformed UTF sequences represented as sequences of replacement glyphs, + * original codes or '?' as a last resort if replacement glyph is undefined + * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006 */ #include <linux/module.h> @@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co /* Do no translation at all in control states */ if (vc->vc_state != ESnormal) { tc = c; - } else if (vc->vc_utf) { + } else if (vc->vc_utf && !vc->vc_disp_ctrl) { /* Combine UTF-8 into Unicode */ - /* Incomplete characters silently ignored */ + /* Malformed sequences as sequences of replacement glyphs */ +rescan_last_byte: if(c > 0x7f) { - if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) { - vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); - vc->vc_utf_count--; - if (vc->vc_utf_count == 0) - tc = c = vc->vc_utf_char; - else continue; + if (vc->vc_utf_count) { + if ((c & 0xc0) == 0x80) { + vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); + if (--vc->vc_utf_count) { + vc->vc_npar++; + continue; + } + tc = c = vc->vc_utf_char; + } else + goto replacement_glyph; } else { + vc->vc_npar = 0; if ((c & 0xe0) == 0xc0) { vc->vc_utf_count = 1; vc->vc_utf_char = (c & 0x1f); @@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co vc->vc_utf_count = 5; vc->vc_utf_char = (c & 0x01); } else - vc->vc_utf_count = 0; + goto replacement_glyph; continue; } } else { + if (vc->vc_utf_count) + goto replacement_glyph; tc = c; - vc->vc_utf_count = 0; } - } else { /* no utf */ + } else { /* no utf or alternate charset mode */ tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c]; } @@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co * direct-to-font zone in UTF-8 mode. */ ok = tc && (c >= 32 || - (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS - : CTRL_ACTION) >> c) & 1))) + !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 : + vc->vc_utf || ((CTRL_ACTION >> c) & 1))) && (c != 127 || vc->vc_disp_ctrl) && (c != 128+27); if (vc->vc_state == ESnormal && ok) { /* Now try to find out how to display it */ tc = conv_uni_to_pc(vc, tc); - if ( tc == -4 ) { + if (tc & ~charmask) { + if ( tc == -4 ) { /* If we got -4 (not found) then see if we have defined a replacement character (U+FFFD) */ - tc = conv_uni_to_pc(vc, 0xfffd); - - /* One reason for the -4 can be that we just - did a clear_unimap(); - try at least to show something. */ - if (tc == -4) - tc = c; - } else if ( tc == -3 ) { - /* Bad hash table -- hope for the best */ - tc = c; - } - if (tc & ~charmask) - continue; /* Conversion failed */ +replacement_glyph: + tc = conv_uni_to_pc(vc, 0xfffd); + if (!(tc & ~charmask)) + goto display_glyph; + } else if ( tc != -3 ) + continue; /* nothing to display */ + /* no hash table or no replacement -- + * hope for the best */ + if ( c & ~charmask ) + tc = '?'; + else + tc = c; + } +display_glyph: if (vc->vc_need_wrap || vc->vc_decim) FLUSH if (vc->vc_need_wrap) { @@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co vc->vc_x++; draw_to = (vc->vc_pos += 2); } + if (vc->vc_utf_count) { + if (vc->vc_npar) { + vc->vc_npar--; + goto display_glyph; + } + vc->vc_utf_count = 0; + c = orig; + goto rescan_last_byte; + } continue; } FLUSH |