154 lines
5.5 KiB
C
154 lines
5.5 KiB
C
/* $XTermId: charclass.c,v 1.14 2006/02/13 01:14:58 tom Exp $ */
|
|
|
|
/*
|
|
* Compact and efficient reimplementation of the
|
|
* xterm character class mechanism for large character sets
|
|
*
|
|
* Markus Kuhn -- mkuhn@acm.org -- 2000-07-03
|
|
*
|
|
* Xterm allows users to select entire words with a double-click on
|
|
* the left mouse button. Opinions might differ on what type of
|
|
* characters are part of separate words, therefore xterm allows users
|
|
* to configure a class code for each 8-bit character. Words are
|
|
* maximum length sequences of neighboring characters with identical
|
|
* class code. Extending this mechanism to Unicode naively would
|
|
* create an at least 2^16 entries (128 kB) long class code table.
|
|
* Instead, we transform the character class table into a list
|
|
* of intervals, that will be accessed via a linear search.
|
|
* Changes made to the table by the user will be appended. A special
|
|
* class code -1 (default) marks characters who have their code number
|
|
* as the class code. We could alternatively use a sorted table of
|
|
* non-overlapping intervals that can be accessed via binary search,
|
|
* but merging in new intervals is significantly more hassle and
|
|
* not worth the effort here.
|
|
*/
|
|
/* $XFree86: xc/programs/xterm/charclass.c,v 1.7 2006/02/13 01:14:58 dickey Exp $ */
|
|
|
|
#include <xterm.h>
|
|
#include <charclass.h>
|
|
|
|
#if OPT_WIDE_CHARS
|
|
|
|
static struct classentry {
|
|
int cclass;
|
|
int first;
|
|
int last;
|
|
} *classtab;
|
|
|
|
/*
|
|
* Special convention for classtab[0]:
|
|
* - classtab[0].cclass is the allocated number of entries in classtab
|
|
* - classtab[0].first = 1 (first used entry in classtab)
|
|
* - classtab[0].last is the last used entry in classtab
|
|
*/
|
|
|
|
int
|
|
SetCharacterClassRange(int low, int high, int value)
|
|
{
|
|
if (high < low)
|
|
return -1; /* nothing to do */
|
|
|
|
/* make sure we have at least one free entry left at table end */
|
|
if (classtab[0].last > classtab[0].cclass - 2) {
|
|
classtab[0].cclass += 5 + classtab[0].cclass / 4;
|
|
classtab = TypeRealloc(struct classentry, classtab[0].cclass, classtab);
|
|
if (!classtab)
|
|
abort();
|
|
}
|
|
|
|
/* simply append new interval to end of interval array */
|
|
classtab[0].last++;
|
|
classtab[classtab[0].last].first = low;
|
|
classtab[classtab[0].last].last = high;
|
|
classtab[classtab[0].last].cclass = value;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
init_classtab(void)
|
|
{
|
|
const int size = 50;
|
|
|
|
classtab = TypeMallocN(struct classentry, size);
|
|
if (!classtab)
|
|
abort();
|
|
classtab[0].cclass = size;
|
|
classtab[0].first = 1;
|
|
classtab[0].last = 0;
|
|
|
|
/* old xterm default classes */
|
|
SetCharacterClassRange(0, 0, 32);
|
|
SetCharacterClassRange(1, 31, 1);
|
|
SetCharacterClassRange('\t', '\t', 32);
|
|
SetCharacterClassRange('0', '9', 48);
|
|
SetCharacterClassRange('A', 'Z', 48);
|
|
SetCharacterClassRange('_', '_', 48);
|
|
SetCharacterClassRange('a', 'z', 48);
|
|
SetCharacterClassRange(127, 159, 1);
|
|
SetCharacterClassRange(160, 191, -1);
|
|
SetCharacterClassRange(192, 255, 48);
|
|
SetCharacterClassRange(215, 215, 216);
|
|
SetCharacterClassRange(247, 247, 248);
|
|
|
|
/* added Unicode classes */
|
|
SetCharacterClassRange(0x0100, 0xffdf, 48); /* mostly characters */
|
|
SetCharacterClassRange(0x037e, 0x037e, -1); /* Greek question mark */
|
|
SetCharacterClassRange(0x0387, 0x0387, -1); /* Greek ano teleia */
|
|
SetCharacterClassRange(0x055a, 0x055f, -1); /* Armenian punctuation */
|
|
SetCharacterClassRange(0x0589, 0x0589, -1); /* Armenian full stop */
|
|
SetCharacterClassRange(0x0700, 0x070d, -1); /* Syriac punctuation */
|
|
SetCharacterClassRange(0x104a, 0x104f, -1); /* Myanmar punctuation */
|
|
SetCharacterClassRange(0x10fb, 0x10fb, -1); /* Georgian punctuation */
|
|
SetCharacterClassRange(0x1361, 0x1368, -1); /* Ethiopic punctuation */
|
|
SetCharacterClassRange(0x166d, 0x166e, -1); /* Canadian Syl. punctuation */
|
|
SetCharacterClassRange(0x17d4, 0x17dc, -1); /* Khmer punctuation */
|
|
SetCharacterClassRange(0x1800, 0x180a, -1); /* Mongolian punctuation */
|
|
SetCharacterClassRange(0x2000, 0x200a, 32); /* spaces */
|
|
SetCharacterClassRange(0x200b, 0x27ff, -1); /* punctuation and symbols */
|
|
SetCharacterClassRange(0x2070, 0x207f, 0x2070); /* superscript */
|
|
SetCharacterClassRange(0x2080, 0x208f, 0x2080); /* subscript */
|
|
SetCharacterClassRange(0x3000, 0x3000, 32); /* ideographic space */
|
|
SetCharacterClassRange(0x3001, 0x3020, -1); /* ideographic punctuation */
|
|
SetCharacterClassRange(0x3040, 0x309f, 0x3040); /* Hiragana */
|
|
SetCharacterClassRange(0x30a0, 0x30ff, 0x30a0); /* Katakana */
|
|
SetCharacterClassRange(0x3300, 0x9fff, 0x4e00); /* CJK Ideographs */
|
|
SetCharacterClassRange(0xac00, 0xd7a3, 0xac00); /* Hangul Syllables */
|
|
SetCharacterClassRange(0xf900, 0xfaff, 0x4e00); /* CJK Ideographs */
|
|
SetCharacterClassRange(0xfe30, 0xfe6b, -1); /* punctuation forms */
|
|
SetCharacterClassRange(0xff00, 0xff0f, -1); /* half/fullwidth ASCII */
|
|
SetCharacterClassRange(0xff1a, 0xff20, -1); /* half/fullwidth ASCII */
|
|
SetCharacterClassRange(0xff3b, 0xff40, -1); /* half/fullwidth ASCII */
|
|
SetCharacterClassRange(0xff5b, 0xff64, -1); /* half/fullwidth ASCII */
|
|
|
|
return;
|
|
}
|
|
|
|
int
|
|
CharacterClass(int c)
|
|
{
|
|
int i, cclass = -1;
|
|
|
|
for (i = classtab[0].first; i <= classtab[0].last; i++)
|
|
if (classtab[i].first <= c && classtab[i].last >= c)
|
|
cclass = classtab[i].cclass;
|
|
|
|
if (cclass < 0)
|
|
cclass = c;
|
|
|
|
return cclass;
|
|
}
|
|
|
|
#ifdef NO_LEAKS
|
|
void
|
|
noleaks_CharacterClass(void)
|
|
{
|
|
if (classtab != 0) {
|
|
free(classtab);
|
|
classtab = 0;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#endif /* OPT_WIDE_CHARS */
|