mirror of
https://github.com/golang/go
synced 2024-11-25 05:07:56 -07:00
lib9: update to Unicode 6.0.0
R=r CC=golang-dev https://golang.org/cl/4121042
This commit is contained in:
parent
ad00644434
commit
176d5769d9
@ -14,3 +14,19 @@ OFILES=\
|
||||
mkrunetype.$O\
|
||||
|
||||
include ../../Make.ccmd
|
||||
|
||||
UnicodeData-%.txt:
|
||||
curl http://www.unicode.org/Public/$*/ucd/UnicodeData.txt >_$@
|
||||
mv _$@ $@
|
||||
|
||||
runetypebody-%.c: mkrunetype UnicodeData-%.txt
|
||||
mkrunetype -p UnicodeData-$*.txt >_$@
|
||||
mv _$@ $@
|
||||
|
||||
CLEANFILES+=UnicodeData.txt
|
||||
|
||||
UNICODE_VERSION=6.0.0
|
||||
|
||||
test: mkrunetype UnicodeData-$(UNICODE_VERSION).txt
|
||||
mkrunetype -c UnicodeData-$(UNICODE_VERSION).txt
|
||||
|
||||
|
@ -93,8 +93,9 @@ usage(void)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
FILE *in;
|
||||
char buf[MAX_LINE], buf2[MAX_LINE];
|
||||
char *fields[NFIELDS + 1], *fields2[NFIELDS + 1];
|
||||
@ -239,7 +240,7 @@ main(int argc, char *argv[]){
|
||||
}else{
|
||||
mktables(argv[0], usepairs);
|
||||
}
|
||||
return 0;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -562,7 +563,8 @@ mkto(const char* label, int* map, int usepairs)
|
||||
|
||||
// Make only range tables and a function for is<label>rune.
|
||||
static void
|
||||
mkisronly(const char* label, char* prop) {
|
||||
mkisronly(const char* label, char* prop)
|
||||
{
|
||||
mkisrange(label, prop, 1);
|
||||
printf(
|
||||
"int\n"
|
||||
|
@ -35,49 +35,4 @@ rbsearch(Rune c, Rune *t, int n, int ne)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The "ideographic" property is hard to extract from UnicodeData.txt,
|
||||
* so it is hard coded here.
|
||||
*
|
||||
* It is defined in the Unicode PropList.txt file, for example
|
||||
* PropList-3.0.0.txt. Unlike the UnicodeData.txt file, the format of
|
||||
* PropList changes between versions. This property appears relatively static;
|
||||
* it is the same in version 4.0.1, except that version defines some >16 bit
|
||||
* chars as ideographic as well: 20000..2a6d6, and 2f800..2Fa1d.
|
||||
*/
|
||||
static Rune __isideographicr[] = {
|
||||
0x3006, 0x3007, /* 0x3006 added in 2.0.14 */
|
||||
0x3021, 0x3029,
|
||||
0x3038, 0x303a, /* added in 3.0.0 */
|
||||
0x3400, 0x4db5, /* added in 3.0.0 */
|
||||
|
||||
/* consecutive */
|
||||
0x4e00, 0x9fa5,
|
||||
0x9fa6, 0x9fbb, /* added in 4.1.0 */
|
||||
0x9fbc, 0x9fc3, /* added in 5.1.0 */
|
||||
0x9fc4, 0x9fcb, /* added in 5.2.0 */
|
||||
|
||||
0xf900, 0xfa2d,
|
||||
|
||||
/* consecutive */
|
||||
0xfa30, 0xfa6a, /* added in 5.1.0 */
|
||||
0xfa6b, 0xfa6d, /* added in 5.2.0 */
|
||||
|
||||
0xfa70, 0xfad9, /* added in 4.1.0 */
|
||||
0x20000, 0x2a6d6, /* added in 3.1.0 */
|
||||
0x2a700, 0x2b734, /* added in 5.2.0 */
|
||||
0x2f800, 0x2fa1d, /* added in 3.1.0 */
|
||||
};
|
||||
|
||||
int
|
||||
isideographicrune(Rune c)
|
||||
{
|
||||
Rune *p;
|
||||
|
||||
p = rbsearch(c, __isideographicr, nelem(__isideographicr)/2, 2);
|
||||
if(p && c >= p[0] && c <= p[1])
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "runetypebody-5.2.0.c"
|
||||
#include "runetypebody-6.0.0.c"
|
||||
|
1565
src/lib9/utf/runetypebody-6.0.0.c
Normal file
1565
src/lib9/utf/runetypebody-6.0.0.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -224,12 +224,6 @@ int isalpharune(Rune r);
|
||||
int isdigitrune(Rune r);
|
||||
|
||||
|
||||
// isideographicrune tests for ideographic characters and numbers, as
|
||||
// defined by the Unicode standard.
|
||||
|
||||
int isideographicrune(Rune r);
|
||||
|
||||
|
||||
// isspacerune tests for whitespace characters, including "C" locale
|
||||
// whitespace, Unicode defined whitespace, and the "zero-width
|
||||
// non-break space" character.
|
||||
|
Loading…
Reference in New Issue
Block a user