Add support for multiple charset definitions

vt100 has support for two defined charset, G0 and G1. Each charset can be defined, but in each moment is selected only one of both charset. This is usually used selecting a national charset in G0 and graphic charset in G1, so you can switch between graphic charset and text charset without losing the national charset already defined. st hasn't support for national charsets, because it is an utf8 based terminal emulator, but it has support for graphic charset because it is heavily used, but it only supports G0, without understanding G1 selection sequences, which causes some programs in some moments can print some garbage in the screen. This patch adds a fake support for multiple charset definitions, where we only support graphic charset and us-ascii charset, but we allow more of one charset definition. This patch allow define G0 until G3 charsets, but only accepts select G0 or G1, and it accepts some national charset definitions but all of them are mapped to us-ascii.
author: Roberto E. Vargas Caballero <k0ga@shike2.com> 2013-10-01 21:23:11 +0200
committer: Roberto E. Vargas Caballero <k0ga@shike2.com> 2013-10-02 21:22:15 +0200
commit: 7a4eefe87cb7661c8a77286d05b6c3afb467f806 (patch)
tree: 9ecce302d1acd95238e312a315c211cce5bec4a3 /st.c
parent: c5c2365ab7c7ac2671b6e7d31cc9b0d41636393c (diff)
download: st-7a4eefe87cb7661c8a77286d05b6c3afb467f806.tar.gz
st-7a4eefe87cb7661c8a77286d05b6c3afb467f806.tar.bz2
1 files changed, 55 insertions, 27 deletions
diff --git a/st.c b/st.c
index 84f9abb..77ea0c8 100644
--- a/st.c
+++ b/st.c
@@ -137,6 +137,16 @@ enum term_mode {
 	                  |MODE_MOUSEMANY,
 };
 
+enum charset {
+	CS_GRAPHIC0,
+	CS_GRAPHIC1,
+	CS_UK,
+	CS_USA,
+	CS_MULTI,
+	CS_GER,
+	CS_FIN
+};
+
 enum escape_state {
 	ESC_START      = 1,
 	ESC_CSI        = 2,
@@ -216,6 +226,9 @@ typedef struct {
 	int bot;      /* bottom scroll limit */
 	int mode;     /* terminal mode flags */
 	int esc;      /* escape state flags */
+	char trantbl[4]; /* charset table translation */
+	int charset;  /* current charset */
+	int icharset; /* selected charset for sequence */
 	bool numlock; /* lock numbers in keyboard */
 	bool *tabs;
 } Term;
@@ -367,6 +380,8 @@ static void tsetmode(bool, bool, int *, int);
 static void tfulldirt(void);
 static void techo(char *, int);
 static long tdefcolor(int *, int *, int);
+static void tselcs(void);
+static void tdeftran(char);
 static inline bool match(uint, uint);
 static void ttynew(void);
 static void ttyread(void);
@@ -1369,6 +1384,8 @@ treset(void) {
 	term.top = 0;
 	term.bot = term.row - 1;
 	term.mode = MODE_WRAP;
+	memset(term.trantbl, sizeof(term.trantbl), CS_USA);
+	term.charset = 0;
 
 	tclearregion(0, 0, term.col-1, term.row-1);
 	tmoveto(0, 0);
@@ -2260,6 +2277,33 @@ techo(char *buf, int len) {
 }
 
 void
+tdeftran(char ascii) {
+	char c, (*bp)[2];
+	static char tbl[][2] = {
+		{'0', CS_GRAPHIC0}, {'1', CS_GRAPHIC1}, {'A', CS_UK},
+		{'B', CS_USA},      {'<', CS_MULTI},    {'K', CS_GER},
+		{'5', CS_FIN},      {'C', CS_FIN},
+		{0, 0}
+	};
+
+	for (bp = &tbl[0]; (c = (*bp)[0]) && c != ascii; ++bp)
+		/* nothing */;
+
+	if (c == 0)
+		fprintf(stderr, "esc unhandled charset: ESC ( %c\n", ascii);
+	else
+		term.trantbl[term.icharset] = (*bp)[1];
+}
+
+void
+tselcs(void) {
+	if (term.trantbl[term.charset] == CS_GRAPHIC0)
+		term.c.attr.mode |= ATTR_GFX;
+	else
+		term.c.attr.mode &= ~ATTR_GFX;
+}
+
+void
 tputc(char *c, int len) {
 	uchar ascii = *c;
 	bool control = ascii < '\x20' || ascii == 0177;
@@ -2351,13 +2395,12 @@ tputc(char *c, int len) {
 			term.esc = ESC_START;
 			return;
 		case '\016': /* SO */
+			term.charset = 0;
+			tselcs();
+			return;
 		case '\017': /* SI */
-			/*
-			 * Different charsets are hard to handle. Applications
-			 * should use the right alt charset escapes for the
-			 * only reason they still exist: line drawing. The
-			 * rest is incompatible history st should not support.
-			 */
+			term.charset = 1;
+			tselcs();
 			return;
 		case '\032': /* SUB */
 		case '\030': /* CAN */
@@ -2385,22 +2428,8 @@ tputc(char *c, int len) {
 			if(ascii == '\\')
 				strhandle();
 		} else if(term.esc & ESC_ALTCHARSET) {
-			switch(ascii) {
-			case '0': /* Line drawing set */
-				term.c.attr.mode |= ATTR_GFX;
-				break;
-			case 'B': /* USASCII */
-				term.c.attr.mode &= ~ATTR_GFX;
-				break;
-			case 'A': /* UK (IGNORED) */
-			case '<': /* multinational charset (IGNORED) */
-			case '5': /* Finnish (IGNORED) */
-			case 'C': /* Finnish (IGNORED) */
-			case 'K': /* German (IGNORED) */
-				break;
-			default:
-				fprintf(stderr, "esc unhandled charset: ESC ( %c\n", ascii);
-			}
+			tdeftran(ascii);
+			tselcs();
 			term.esc = 0;
 		} else if(term.esc & ESC_TEST) {
 			if(ascii == '8') { /* DEC screen alignment test. */
@@ -2431,13 +2460,12 @@ tputc(char *c, int len) {
 				term.esc |= ESC_STR;
 				break;
 			case '(': /* set primary charset G0 */
+			case ')': /* set secondary charset G1 */
+			case '*': /* set tertiary charset G2 */
+			case '+': /* set quaternary charset G3 */
+				term.icharset = ascii - '(';
 				term.esc |= ESC_ALTCHARSET;
 				break;
-			case ')': /* set secondary charset G1 (IGNORED) */
-			case '*': /* set tertiary charset G2 (IGNORED) */
-			case '+': /* set quaternary charset G3 (IGNORED) */
-				term.esc = 0;
-				break;
 			case 'D': /* IND -- Linefeed */
 				if(term.c.y == term.bot) {
 					tscrollup(term.top, 1);
author	Roberto E. Vargas Caballero <k0ga@shike2.com>	2013-10-01 21:23:11 +0200
committer	Roberto E. Vargas Caballero <k0ga@shike2.com>	2013-10-02 21:22:15 +0200
commit	7a4eefe87cb7661c8a77286d05b6c3afb467f806 (patch)
tree	9ecce302d1acd95238e312a315c211cce5bec4a3 /st.c
parent	c5c2365ab7c7ac2671b6e7d31cc9b0d41636393c (diff)
download	st-7a4eefe87cb7661c8a77286d05b6c3afb467f806.tar.gz st-7a4eefe87cb7661c8a77286d05b6c3afb467f806.tar.bz2