/*
 * Functions to define the character set
 * and do things specific to the character set.
 */

#include "less.h"
#if SETLOCALE
#include <locale.h>
#endif

/*
 * Common special variable for treating KANJI code in line.c and output.c
 */
public int treat_jis_code = 0;	/* Treat JIS as one of the KANJI code */
public int treat_ujis_code = 0;	/* Treat UJIS as one of the KANJI code */
public int treat_sjis_code = 0;	/* Treat SJIS as one of the KANJI code */
public int treat_all_kanji_code = 0;	/* Treat all KANJI code without KANA */
public int output_jis = 0;	/* Do you want to JIS as output kanji code? */

/*
 * Predefined character sets,
 * selected by the LESSCHARSET environment variable.
 */
struct charset {
	char *name;
	char *desc;
} charsets[] = {
	{ "ascii",	"8bcccbcc18b95.b"	},
	{ "latin1",	"8bcccbcc18b95.33b."	},
	{ "jis",		"8bcccb4c11bc4b95.b"		},
	{ "ujis-jis",		"8bcccb4c11bc4b95.15b2.17b94.b"	},
	{ "euc-jis",		"8bcccb4c11bc4b95.15b2.17b94.b"	},
	{ "sjis-jis",		"8bcccb4c11bc4b95.15b2.17b94.b"	},
	{ "ujis",		"8bcccbcc18b95.15b2.17b94.b"	},
	{ "euc",		"8bcccbcc18b95.15b2.17b94.b"	},
	{ "jis-ujis",		"8bcccb4c11bc4b95.15b2.17b94.b"	},
	{ "jis-euc",		"8bcccb4c11bc4b95.15b2.17b94.b"	},
	{ "sjis",		"8bcccbcc18b95.b125.3b" 	},
	{ "jis-sjis",		"8bcccb4c11bc4b95.b125.3b"	},
	{ "japanese-jis",	"8bcccb4c11bc4b95.b127.b"	},
	{ "japanese-ujis",	"8bcccb4c11bc4b95.b127.b"	},
	{ "japanese-euc",	"8bcccb4c11bc4b95.b127.b"	},
	{ "japanese-sjis",	"8bcccb4c11bc4b95.b127.b"	},
	{ NULL }
};

/*
 * Predefined local languages,
 * selected by the setlocale() or LC_CTYPE env. var. or LANG env. var.
 */
struct charlocale {
	char *name;
	char *charset;
} charlocales[] = {
	{ "C",			"ascii"		},
	{ "ja_JP.JIS",		"japanese-jis"	},
	{ "ja_JP.jis7",		"japanese-jis"	},
	{ "ja_JP.EUC",		"japanese-ujis"	},
	{ "ja_JP.ujis",		"japanese-ujis"	},
	{ "ja_JP.SJIS",		"japanese-sjis"	},
	{ "ja_JP.mscode",	"japanese-sjis"	},
	{ "japan",		"japanese-ujis"	},
	{ "Japan",		"japanese-ujis"	},
#if defined(hpux)
	{ "japanese",		"japanese-sjis"	},
	{ "japanese.euc",	"japanese-ujis"	},
#else
	{ "japanese",		"japanese-ujis"	},
#endif
	{ "Japanese",		"japanese-ujis"	},
#if defined(_AIX)	/* IBM RS/6000 AIX 3.2 */
	{ "Ja_JP",		"japanese-sjis"	},
	{ "ja_JP",		"japanese-ujis"	},
	{ "En_JP",		"ascii"	},
	{ "En_US",		"ascii" },
	{ "Jp_JP",		"japanese-sjis"	},	/* AIX 3.1 */
# if 0
	{ "Jp_JP.pc301",	"japanese-sjis" },
	{ "Jp_JP.pc932",	"japanese-sjis" },
	{ "ibmsbcs",		"ascii" },
	{ "En_US.ascii",	"ascii" },
	{ "En_US.pc850",	"ascii" },
	{ "En_US.pc932",	"ascii" },
	{ "En_JP",		"ascii"	},
	{ "En_JP.pc932",	"ascii" },
# endif
#endif /* _AIX */
	{ NULL }
};

#define	IS_BINARY_CHAR	01
#define	IS_CONTROL_CHAR	02

static char chardef[256];
static char *binfmt = "\\%o";
public int binattr = BLINK;

extern char *getenv();

/*
 * Define a charset, given a description string.
 * The string consists of 256 letters,
 * one for each character in the charset.
 * If the string is shorter than 256 letters, missing letters
 * are taken to be identical to the last one.
 * A decimal number followed by a letter is taken to be a 
 * repetition of the letter.
 *
 * Each letter is one of:
 *	. normal character
 *	b binary character
 *	c control character
 */
	static void
ichardef(s)
	char *s;
{
	register char *cp;
	register int n;
	register char v;

	n = 0;
	cp = chardef;
	while (*s != '\0')
	{
		switch (*s++)
		{
		case '.':
			v = 0;
			break;
		case 'c':
			v = IS_CONTROL_CHAR;
			break;
		case 'b':
			v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
			break;

		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			n = (10 * n) + (s[-1] - '0');
			continue;

		default:
			error("invalid chardef", NULL_PARG);
			quit(1);
			/*NOTREACHED*/
		}

		do
		{
			if (cp >= chardef + sizeof(chardef))
			{
				error("chardef longer than 256", NULL_PARG);
				quit(1);
				/*NOTREACHED*/
			}
			*cp++ = v;
		} while (--n > 0);
		n = 0;
	}

	while (cp < chardef + sizeof(chardef))
		*cp++ = v;
}

/*
 * Define a charset, given a charset name.
 * The valid charset names are listed in the "charsets" array.
 */
	static int
icharset(name)
	register char *name;
{
	register int len;
	register struct charset *p;

	if (name == NULL || *name == '\0')
		return (0);

	for (p = charsets;  p->name != NULL;  p++)
	{
		if (strcmp(name, p->name) == 0)
		{
			len = strlen(name);
			if (strncmp(name, "jis", 3) == 0)
				treat_jis_code = 1;
			else if (strncmp(name, "ujis", 4) == 0)
				treat_ujis_code = 1;
			else if (strncmp(name, "euc", 4) == 0)
				treat_ujis_code = 1;
			else if (strncmp(name, "sjis", 4) == 0)
				treat_sjis_code = 1;
			else if (strncmp(name, "japanese", 8) == 0)
				treat_all_kanji_code = 1;
			if (len == 3 && strcmp(name, "jis") == 0
			    || strcmp(name + len - 4, "-jis") == 0)
			{
				treat_jis_code = 1;
				output_jis = 1;
			} else if (len == 4 && strcmp(name, "ujis") == 0 ||
				   strcmp(name + len - 5, "-ujis") == 0)
			{
				treat_ujis_code = 1;
			} else if (len == 4 && strcmp(name, "euc") == 0 ||
				   strcmp(name + len - 5, "-euc") == 0)
			{
				treat_ujis_code = 1;
			} else if (len == 4 && strcmp(name, "sjis") == 0 ||
				   strcmp(name + len - 5, "-sjis") == 0)
			{
				treat_sjis_code = 1;
			}
			ichardef(p->desc);
			return (1);
		}
	}

	error("invalid charset name", NULL_PARG);
	quit(1);
	/*NOTREACHED*/
}

/*
 * Define a charset, given a name of local language.
 * The valid names are listed in the "charlocales" array.
 */
	static int
icharlocale(name)
	register char *name;
{
	register struct charlocale *p;
# if 1
	PARG parg;
# endif

	if (name == NULL || *name == '\0')
		return (0);

	for (p = charlocales;  p->name != NULL;  p++)
	{
		if (strcmp(name, p->name) == 0 &&
		    icharset(p->charset))
			return (1);
	}

# if 1
	parg.p_string = name;
	error("Sorry, I don't know the language '%s', use default char set.",
	      &parg);
	return (0);
# else
	error("invalid language name", NULL_PARG);
	quit(1);
	/*NOTREACHED*/
# endif
}

/*
 * Initialize charset data structures.
 */
	public void
init_charset()
{
	register char *s;

	/*
	 * Try environment variable LESSCHARSET.
	 * If LESSCHARSET is not set, try LESSCHARDEF.
	 * If LESSCHARDEF is not set, default to "ascii" charset.
	 */
	s = getenv("LESSCHARSET");
	if (icharset(s))
		return;

	s = getenv("LESSCHARDEF");
	if (s != NULL && *s != '\0')
	{
		ichardef(s);
		return;
	}

#if SETLOCALE
	s = setlocale(LC_CTYPE, "");
# if defined(hpux)||defined(H3050R)||defined(H3050)	/* HP-UX 8 */
	if (s != NULL) {
		extern char *strchr(), *strrchr();
		/* treating "/:japanese;/" */
		/* fprintf(stderr, "[%s]\n", ((s) ? s : "(NULL)")); */
		s = strchr(s, (int)':');
		s++;
		*(strrchr(s, (int)';')) = '\0';
	}
# endif
#else
	s = getenv("LC_CTYPE");
	if (s == NULL)
		s = getenv("LANG");
#endif
	if (icharlocale(s))
		return;

	(void) icharset(DEFCHARSET);

	s = getenv("LESSBINFMT");
	if (s != NULL && *s != '\0')
	{
		if (*s == '*')
		{
			switch (s[1])
			{
			case 'd':  binattr = BOLD;      break;
			case 'k':  binattr = BLINK;     break;
			case 'u':  binattr = UNDERLINE; break;
			default:   binattr = NORMAL;    break;
			}
			s += 2;
		}
		if (*s != '\0')
			binfmt = s;
	}
}

/*
 * Is a given character a "binary" character?
 */
	public int
binary_char(c)
	int c;
{
	return (chardef[c] & IS_BINARY_CHAR);
}

/*
 * Is a given character a "control" character?
 */
	public int
control_char(c)
	int c;
{
	return (chardef[c] & IS_CONTROL_CHAR);
}

/*
 * Return the printable form of a character.
 * For example, in the "ascii" charset '\3' is printed as "^C".
 */
	public char *
prchar(c)
	int c;
{
	static char buf[8];

	if (!control_char(c))
		sprintf(buf, "%c", c);
	else if (!control_char(c ^ 0100))
		sprintf(buf, "^%c", c ^ 0100);
	else
		sprintf(buf, binfmt, c);
	return (buf);
}
