426 lines
11 KiB
C
426 lines
11 KiB
C
/*
|
|
Copyright (c) 2002 by Tomohiro KUBOTA
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
/* $XFree86: xc/programs/luit/other.c,v 1.1 2002/10/17 01:06:09 dawes Exp $ */
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <X11/fonts/fontenc.h>
|
|
#include "other.h"
|
|
#include "charset.h"
|
|
|
|
#ifndef NULL
|
|
#define NULL 0
|
|
#endif
|
|
|
|
#define EURO_10646 0x20AC
|
|
|
|
int
|
|
init_gbk(OtherStatePtr s)
|
|
{
|
|
s->gbk.mapping =
|
|
FontEncMapFind("gbk-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
|
|
if(!s->gbk.mapping) return 0;
|
|
|
|
s->gbk.reverse = FontMapReverse(s->gbk.mapping);
|
|
if(!s->gbk.reverse) return 0;
|
|
|
|
s->gbk.buf = -1;
|
|
return 1;
|
|
}
|
|
|
|
unsigned int
|
|
mapping_gbk(unsigned int n, OtherStatePtr s)
|
|
{
|
|
unsigned int r;
|
|
if(n < 128) return n;
|
|
if(n == 128) return EURO_10646;
|
|
r = FontEncRecode(n, s->gbk.mapping);
|
|
return r;
|
|
}
|
|
|
|
unsigned int
|
|
reverse_gbk(unsigned int n, OtherStatePtr s)
|
|
{
|
|
if(n < 128) return n;
|
|
if(n == EURO_10646) return 128;
|
|
return s->gbk.reverse->reverse(n, s->gbk.reverse->data);
|
|
}
|
|
|
|
int
|
|
stack_gbk(unsigned char c, OtherStatePtr s)
|
|
{
|
|
if(s->gbk.buf < 0) {
|
|
if(c < 129) return c;
|
|
s->gbk.buf = c;
|
|
return -1;
|
|
} else {
|
|
int b;
|
|
if(c < 0x40 || c == 0x7F) {
|
|
s->gbk.buf = -1;
|
|
return c;
|
|
}
|
|
if(s->gbk.buf < 0xFF && c < 0xFF)
|
|
b = (s->gbk.buf << 8) + c;
|
|
else
|
|
b = -1;
|
|
s->gbk.buf = -1;
|
|
return b;
|
|
}
|
|
}
|
|
|
|
int
|
|
init_utf8(OtherStatePtr s)
|
|
{
|
|
s->utf8.buf_ptr = 0;
|
|
return 1;
|
|
}
|
|
|
|
unsigned int
|
|
mapping_utf8(unsigned int n, OtherStatePtr s)
|
|
{
|
|
return n;
|
|
}
|
|
|
|
unsigned int
|
|
reverse_utf8(unsigned int n, OtherStatePtr s)
|
|
{
|
|
if(n < 0x80)
|
|
return n;
|
|
if(n < 0x800)
|
|
return 0xC080 + ((n&0x7C0)<<2) + (n&0x3F);
|
|
if(n < 0x10000)
|
|
return 0xE08080 + ((n&0xF000)<<4) + ((n&0xFC0)<<2) + (n&0x3F);
|
|
return 0xF0808080 + ((n&0x1C0000)<<6) + ((n&0x3F000)<<4) +
|
|
((n&0xFC0)<<2) + (n&0x3F);
|
|
}
|
|
|
|
int
|
|
stack_utf8(unsigned char c, OtherStatePtr s)
|
|
{
|
|
int u;
|
|
|
|
if(c < 0x80) {
|
|
s->utf8.buf_ptr = 0;
|
|
return c;
|
|
}
|
|
if(s->utf8.buf_ptr == 0) {
|
|
if((c & 0x40) == 0) return -1;
|
|
s->utf8.buf[s->utf8.buf_ptr++] = c;
|
|
if((c & 0x60) == 0x40) s->utf8.len = 2;
|
|
else if((c & 0x70) == 0x60) s->utf8.len = 3;
|
|
else if((c & 0x78) == 0x70) s->utf8.len = 4;
|
|
else s->utf8.buf_ptr = 0;
|
|
return -1;
|
|
}
|
|
if((c & 0x40) != 0) {
|
|
s->utf8.buf_ptr = 0;
|
|
return -1;
|
|
}
|
|
s->utf8.buf[s->utf8.buf_ptr++] = c;
|
|
if(s->utf8.buf_ptr < s->utf8.len) return -1;
|
|
switch(s->utf8.len) {
|
|
case 2:
|
|
u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F);
|
|
s->utf8.buf_ptr = 0;
|
|
if(u < 0x80) return -1; else return u;
|
|
case 3:
|
|
u = ((s->utf8.buf[0] & 0x0F) << 12)
|
|
| ((s->utf8.buf[1] & 0x3F) << 6)
|
|
| (s->utf8.buf[2] & 0x3F);
|
|
s->utf8.buf_ptr = 0;
|
|
if(u < 0x800) return -1; else return u;
|
|
case 4:
|
|
u = ((s->utf8.buf[0] & 0x03) << 18)
|
|
| ((s->utf8.buf[1] & 0x3F) << 12)
|
|
| ((s->utf8.buf[2] & 0x3F) << 6)
|
|
| ((s->utf8.buf[3] & 0x3F));
|
|
s->utf8.buf_ptr = 0;
|
|
if(u < 0x10000) return -1; else return u;
|
|
}
|
|
s->utf8.buf_ptr = 0;
|
|
return -1;
|
|
}
|
|
|
|
|
|
#define HALFWIDTH_10646 0xFF61
|
|
#define YEN_SJIS 0x5C
|
|
#define YEN_10646 0x00A5
|
|
#define OVERLINE_SJIS 0x7E
|
|
#define OVERLINE_10646 0x203E
|
|
|
|
int
|
|
init_sjis(OtherStatePtr s)
|
|
{
|
|
s->sjis.x0208mapping =
|
|
FontEncMapFind("jisx0208.1990-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
|
|
if(!s->sjis.x0208mapping) return 0;
|
|
|
|
s->sjis.x0208reverse = FontMapReverse(s->sjis.x0208mapping);
|
|
if(!s->sjis.x0208reverse) return 0;
|
|
|
|
s->sjis.x0201mapping =
|
|
FontEncMapFind("jisx0201.1976-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
|
|
if(!s->sjis.x0201mapping) return 0;
|
|
|
|
s->sjis.x0201reverse = FontMapReverse(s->sjis.x0201mapping);
|
|
if(!s->sjis.x0201reverse) return 0;
|
|
|
|
s->sjis.buf = -1;
|
|
return 1;
|
|
}
|
|
|
|
unsigned int
|
|
mapping_sjis(unsigned int n, OtherStatePtr s)
|
|
{
|
|
unsigned int j1, j2, s1, s2;
|
|
if(n == YEN_SJIS) return YEN_10646;
|
|
if(n == OVERLINE_SJIS) return OVERLINE_10646;
|
|
if(n < 0x80) return n;
|
|
if(n >= 0xA0 && n <= 0xDF) return FontEncRecode(n, s->sjis.x0201mapping);
|
|
s1 = ((n>>8)&0xFF);
|
|
s2 = (n&0xFF);
|
|
j1 = (s1 << 1) - (s1 <= 0x9F ? 0xE0 : 0x160) - (s2 < 0x9F ? 1 : 0);
|
|
j2 = s2 - 0x1F - (s2 >= 0x7F ? 1 : 0) - (s2 >= 0x9F ? 0x5E : 0);
|
|
return FontEncRecode((j1<<8) + j2, s->sjis.x0208mapping);
|
|
}
|
|
|
|
unsigned int
|
|
reverse_sjis(unsigned int n, OtherStatePtr s)
|
|
{
|
|
unsigned int j, j1, j2, s1, s2;
|
|
if(n == YEN_10646) return YEN_SJIS;
|
|
if(n == OVERLINE_10646) return OVERLINE_SJIS;
|
|
if(n < 0x80) return n;
|
|
if(n >= HALFWIDTH_10646)
|
|
return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data);
|
|
j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data);
|
|
j1 = ((j>>8)&0xFF);
|
|
j2 = (j&0xFF);
|
|
s1 = ((j1 - 1) >> 1) + ((j1 <= 0x5E) ? 0x71 : 0xB1);
|
|
s2 = j2 + ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E);
|
|
return (s1<<8) + s2;
|
|
}
|
|
|
|
int
|
|
stack_sjis(unsigned char c, OtherStatePtr s)
|
|
{
|
|
if(s->sjis.buf < 0) {
|
|
if(c < 128 || (c >= 0xA0 && c <= 0xDF)) return c;
|
|
s->sjis.buf = c;
|
|
return -1;
|
|
} else {
|
|
int b;
|
|
if(c < 0x40 || c == 0x7F) {
|
|
s->sjis.buf = -1;
|
|
return c;
|
|
}
|
|
if(s->sjis.buf < 0xFF && c < 0xFF)
|
|
b = (s->sjis.buf << 8) + c;
|
|
else
|
|
b = -1;
|
|
s->sjis.buf = -1;
|
|
return b;
|
|
}
|
|
}
|
|
|
|
int
|
|
init_hkscs(OtherStatePtr s)
|
|
{
|
|
s->hkscs.mapping =
|
|
FontEncMapFind("big5hkscs-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
|
|
if(!s->hkscs.mapping) return 0;
|
|
|
|
s->hkscs.reverse = FontMapReverse(s->hkscs.mapping);
|
|
if(!s->hkscs.reverse) return 0;
|
|
|
|
s->hkscs.buf = -1;
|
|
return 1;
|
|
}
|
|
|
|
unsigned int
|
|
mapping_hkscs(unsigned int n, OtherStatePtr s)
|
|
{
|
|
unsigned int r;
|
|
if(n < 128) return n;
|
|
if(n == 128) return EURO_10646;
|
|
r = FontEncRecode(n, s->hkscs.mapping);
|
|
return r;
|
|
}
|
|
|
|
unsigned int
|
|
reverse_hkscs(unsigned int n, OtherStatePtr s)
|
|
{
|
|
if(n < 128) return n;
|
|
if(n == EURO_10646) return 128;
|
|
return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data);
|
|
}
|
|
|
|
int
|
|
stack_hkscs(unsigned char c, OtherStatePtr s)
|
|
{
|
|
if(s->hkscs.buf < 0) {
|
|
if(c < 129) return c;
|
|
s->hkscs.buf = c;
|
|
return -1;
|
|
} else {
|
|
int b;
|
|
if(c < 0x40 || c == 0x7F) {
|
|
s->hkscs.buf = -1;
|
|
return c;
|
|
}
|
|
if(s->hkscs.buf < 0xFF && c < 0xFF)
|
|
b = (s->hkscs.buf << 8) + c;
|
|
else
|
|
b = -1;
|
|
s->hkscs.buf = -1;
|
|
return b;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Because of the 1 ~ 4 multi-bytes nature of GB18030.
|
|
* CharSet encoding is split to 2 subset (besides latin)
|
|
* The 2Bytes MB char is defined in gb18030.2000-0
|
|
* The 4Bytes MB char is defined in gb18030.2000-1
|
|
* Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value
|
|
* mapping.
|
|
* To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the
|
|
* unicode value after that.
|
|
*
|
|
* For more info on GB18030 standard pls check:
|
|
* http://oss.software.ibm.com/icu/docs/papers/gb18030.html
|
|
*
|
|
* For more info on GB18030 implementation issues in XFree86 pls check:
|
|
* http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1
|
|
*/
|
|
int
|
|
init_gb18030(OtherStatePtr s)
|
|
{
|
|
s->gb18030.cs0_mapping =
|
|
FontEncMapFind("gb18030.2000-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
|
|
if(!s->gb18030.cs0_mapping) return 0;
|
|
|
|
s->gb18030.cs0_reverse = FontMapReverse(s->gb18030.cs0_mapping);
|
|
if(!s->gb18030.cs0_reverse) return 0;
|
|
|
|
s->gb18030.cs1_mapping =
|
|
FontEncMapFind("gb18030.2000-1", FONT_ENCODING_UNICODE, -1, -1, NULL);
|
|
if(!s->gb18030.cs1_mapping) return 0;
|
|
|
|
s->gb18030.cs1_reverse = FontMapReverse(s->gb18030.cs1_mapping);
|
|
if(!s->gb18030.cs1_reverse) return 0;
|
|
|
|
s->gb18030.linear = 0;
|
|
s->gb18030.buf_ptr = 0;
|
|
return 1;
|
|
}
|
|
|
|
unsigned int
|
|
mapping_gb18030(unsigned int n, OtherStatePtr s)
|
|
{
|
|
if(n <= 0x80) return n; /* 0x80 is valid but unassigned codepoint */
|
|
if(n >= 0xFFFF) return '?';
|
|
|
|
return FontEncRecode(n,
|
|
(s->gb18030.linear)?s->gb18030.cs1_mapping:s->gb18030.cs0_mapping);
|
|
}
|
|
|
|
unsigned int
|
|
reverse_gb18030(unsigned int n, OtherStatePtr s)
|
|
{
|
|
/* when lookup in 2000-0 failed. */
|
|
/* lookup in 2000-1 and then try to unlinear'd */
|
|
unsigned int r;
|
|
if(n <= 0x80) return n;
|
|
|
|
r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data);
|
|
if (r != 0)
|
|
return r;
|
|
|
|
r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data);
|
|
if (r != 0) {
|
|
unsigned char bytes[4];
|
|
|
|
bytes[3] = 0x30 + r % 10; r /= 10;
|
|
bytes[2] = 0x81 + r % 126; r /= 126;
|
|
bytes[1] = 0x30 + r % 10; r /= 10;
|
|
bytes[0] = 0x81 + r;
|
|
|
|
r = (unsigned int)bytes[0] << 24;
|
|
r |= (unsigned int)bytes[1] << 16;
|
|
r |= (unsigned int)bytes[2] << 8;
|
|
r |= (unsigned int)bytes[3];
|
|
}
|
|
return r;
|
|
}
|
|
|
|
int
|
|
stack_gb18030(unsigned char c, OtherStatePtr s)
|
|
{
|
|
/* if set gb18030.linear => True. the return value is "linear'd" */
|
|
if(s->gb18030.buf_ptr == 0) {
|
|
if(c <= 0x80) return c;
|
|
if (c == 0xFF) return -1;
|
|
s->gb18030.linear = 0;
|
|
s->gb18030.buf[s->gb18030.buf_ptr++] = c;
|
|
return -1;
|
|
} else if (s->gb18030.buf_ptr == 1) {
|
|
if (c >= 0x40) {
|
|
s->gb18030.buf_ptr = 0;
|
|
if ((c == 0x80) || (c == 0xFF))
|
|
return -1;
|
|
else
|
|
return (s->gb18030.buf[0] << 8) + c;
|
|
} else if (c >= 30) { /* 2Byte is (0x30 -> 0x39) */
|
|
s->gb18030.buf[s->gb18030.buf_ptr++] = c;
|
|
return -1;
|
|
} else {
|
|
s->gb18030.buf_ptr = 0;
|
|
return c;
|
|
}
|
|
} else if (s->gb18030.buf_ptr == 2) {
|
|
if ((c >= 0x81) && (c <= 0xFE)) {
|
|
s->gb18030.buf[s->gb18030.buf_ptr++] = c;
|
|
return -1;
|
|
} else {
|
|
s->gb18030.buf_ptr = 0;
|
|
return c;
|
|
}
|
|
} else {
|
|
int r = 0;
|
|
s->gb18030.buf_ptr = 0;
|
|
if ((c >= 0x30) && (c <= 0x39)) {
|
|
s->gb18030.linear = 1;
|
|
r = (((s->gb18030.buf[0] - 0x81) * 10
|
|
+ (s->gb18030.buf[1] - 0x30)) * 126
|
|
+ (s->gb18030.buf[2] - 0x81)) * 10
|
|
+ (c - 0x30);
|
|
return r;
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
|