Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d4328b40 authored by Adam Tlalka's avatar Adam Tlalka Committed by Linus Torvalds
Browse files

[PATCH] console utf-8 mode fixes



Fix utf-8 mode so alternate charset modes always work according to control
sequences interpreted in do_con_trol function preserving backward US-ASCII
and VT100 semigraphics compatibility.

Malformed utf-8 sequences are represented as sequences of replacement
glyphs,original codes or '?' as a last resort.

unicode-xterm, gnome-terminal, kconsole and other terminal emulators in
utf-8 mode respect acsc, enacs, rmacs sequences.  Also I found that some
important system programs (from Debian distro) uses acsc in utf-8 mode -
dselect, aptitude, w3m for example.

Signed-off-by: default avatarAdam Tlalka <atlka@pg.gda.pl>
Acked-by: default avatarAlan Cox <alan@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 08c67d2a
Loading
Loading
Loading
Loading
+52 −27
Original line number Diff line number Diff line
@@ -63,6 +63,13 @@
 *
 * Removed console_lock, enabled interrupts across all console operations
 * 13 March 2001, Andrew Morton
 *
 * Fixed UTF-8 mode so alternate charset modes always work according
 * to control sequences interpreted in do_con_trol function
 * preserving backward VT100 semigraphics compatibility,
 * malformed UTF sequences represented as sequences of replacement glyphs,
 * original codes or '?' as a last resort if replacement glyph is undefined
 * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
 */

#include <linux/module.h>
@@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
		/* Do no translation at all in control states */
		if (vc->vc_state != ESnormal) {
			tc = c;
		} else if (vc->vc_utf) {
		} else if (vc->vc_utf && !vc->vc_disp_ctrl) {
		    /* Combine UTF-8 into Unicode */
		    /* Incomplete characters silently ignored */
		    /* Malformed sequences as sequences of replacement glyphs */
rescan_last_byte:
		    if(c > 0x7f) {
			if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) {
			if (vc->vc_utf_count) {
			       if ((c & 0xc0) == 0x80) {
				       vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
				vc->vc_utf_count--;
				if (vc->vc_utf_count == 0)
       				       if (--vc->vc_utf_count) {
					       vc->vc_npar++;
				   	       continue;
       				       }
				       tc = c = vc->vc_utf_char;
				else continue;
			       } else
				       goto replacement_glyph;
			} else {
				vc->vc_npar = 0;
				if ((c & 0xe0) == 0xc0) {
				    vc->vc_utf_count = 1;
				    vc->vc_utf_char = (c & 0x1f);
@@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
				    vc->vc_utf_count = 5;
				    vc->vc_utf_char = (c & 0x01);
				} else
				    vc->vc_utf_count = 0;
	    			    goto replacement_glyph;
				continue;
			      }
		    } else {
		      if (vc->vc_utf_count)
	  		      goto replacement_glyph;
		      tc = c;
		      vc->vc_utf_count = 0;
		    }
		} else {	/* no utf */
		} else {	/* no utf or alternate charset mode */
		  tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
		}

@@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
                 * direct-to-font zone in UTF-8 mode.
                 */
                ok = tc && (c >= 32 ||
			    (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS
						: CTRL_ACTION) >> c) & 1)))
			    !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
				  vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
			&& (c != 127 || vc->vc_disp_ctrl)
			&& (c != 128+27);

		if (vc->vc_state == ESnormal && ok) {
			/* Now try to find out how to display it */
			tc = conv_uni_to_pc(vc, tc);
			if (tc & ~charmask) {
				if ( tc == -4 ) {
                                /* If we got -4 (not found) then see if we have
                                   defined a replacement character (U+FFFD) */
replacement_glyph:
                                	tc = conv_uni_to_pc(vc, 0xfffd);

				/* One reason for the -4 can be that we just
				   did a clear_unimap();
				   try at least to show something. */
				if (tc == -4)
				     tc = c;
                        } else if ( tc == -3 ) {
                                /* Bad hash table -- hope for the best */
					if (!(tc & ~charmask))
						goto display_glyph;
                        	} else if ( tc != -3 )
                                	continue; /* nothing to display */
                                /* no hash table or no replacement --
				 * hope for the best */
				if ( c & ~charmask )
					tc = '?';
				else
					tc = c;
			}
			if (tc & ~charmask)
                                continue; /* Conversion failed */

display_glyph:
			if (vc->vc_need_wrap || vc->vc_decim)
				FLUSH
			if (vc->vc_need_wrap) {
@@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
				vc->vc_x++;
				draw_to = (vc->vc_pos += 2);
			}
			if (vc->vc_utf_count) {
				if (vc->vc_npar) {
					vc->vc_npar--;
					goto display_glyph;
				}
				vc->vc_utf_count = 0;
				c = orig;
				goto rescan_last_byte;
			}
			continue;
		}
		FLUSH