From 5b904a3bdee22529d4f6c42aa83c98c5d895eadb Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Mon, 7 Jul 2008 14:07:46 -0700 Subject: [PATCH] update to Unicode 5 SVN=126184 --- src/lib/math/asin.go | 4 +- src/lib/math/exp.go | 2 +- src/lib/math/log.go | 4 +- src/lib/math/main.go | 20 +- src/lib/math/pow.go | 4 +- src/lib/math/sinh.go | 4 +- src/lib/math/sqrt.go | 3 +- src/lib/math/tan.go | 2 +- src/lib9/utf/mkrunetype.c | 733 ++++++++++++++++++++++++ src/lib9/utf/rune.c | 230 +++++++- src/lib9/utf/runetype.c | 1139 +------------------------------------ src/lib9/utf/utf.h | 248 ++++++++ src/lib9/utf/utfdef.h | 35 +- src/lib9/utf/utfecpy.c | 9 +- src/lib9/utf/utflen.c | 9 +- src/lib9/utf/utfnlen.c | 10 +- src/lib9/utf/utfrrune.c | 12 +- src/lib9/utf/utfrune.c | 10 +- src/lib9/utf/utfutf.c | 13 +- src/runtime/Makefile | 1 + src/runtime/rune.c | 224 ++++++++ src/runtime/runtime.h | 2 + src/runtime/string.c | 49 -- test/string_lit.go | 9 + 24 files changed, 1520 insertions(+), 1256 deletions(-) create mode 100644 src/lib9/utf/mkrunetype.c create mode 100644 src/lib9/utf/utf.h create mode 100644 src/runtime/rune.c diff --git a/src/lib/math/asin.go b/src/lib/math/asin.go index 9a83e9e435d..a0135f48fdb 100644 --- a/src/lib/math/asin.go +++ b/src/lib/math/asin.go @@ -34,7 +34,7 @@ asin(arg double)double sign = true; } if arg > 1 { - panic "return sys.NaN()"; + return sys.NaN(); } temp = sqrt(1 - x*x); @@ -54,7 +54,7 @@ func acos(arg double)double { if(arg > 1 || arg < -1) { - panic "return sys.NaN()"; + return sys.NaN(); } return pio2 - asin(arg); } diff --git a/src/lib/math/exp.go b/src/lib/math/exp.go index 6be61afdf3a..b428273e5ff 100644 --- a/src/lib/math/exp.go +++ b/src/lib/math/exp.go @@ -40,7 +40,7 @@ exp(arg double) double return 0.; } if arg > maxf { - panic "return sys.Inf(1)" + return sys.Inf(1) } x = arg*log2e; diff --git a/src/lib/math/log.go b/src/lib/math/log.go index 1c44eb8a3bd..7ad809cb08d 100644 --- a/src/lib/math/log.go +++ b/src/lib/math/log.go @@ -36,7 +36,7 @@ log(arg double) double var exp int; if arg <= 0 { - panic "return sys.NaN()"; + return sys.NaN(); } exp,x = sys.frexp(arg); @@ -63,7 +63,7 @@ log10(arg double) double { if arg <= 0 { - panic "return sys.NaN()"; + return sys.NaN(); } return log(arg) * ln10o1; } diff --git a/src/lib/math/main.go b/src/lib/math/main.go index 2fa7ea152f2..0006151d9ff 100644 --- a/src/lib/math/main.go +++ b/src/lib/math/main.go @@ -5,7 +5,25 @@ package main -import math "math" +//import math "math" +////////////////// + import math "asin" + import math "atan" + import math "atan2" + import math "exp" + import math "fabs" + import math "floor" + import math "fmod" + import math "hypot" + import math "log" + import math "pow" + import math "pow10" + import math "sin" + import math "sinh" + import math "sqrt" + import math "tan" + import math "tanh" + const ( diff --git a/src/lib/math/pow.go b/src/lib/math/pow.go index dba41efdcb6..958bb371c14 100644 --- a/src/lib/math/pow.go +++ b/src/lib/math/pow.go @@ -26,14 +26,14 @@ pow(arg1,arg2 double) double if arg1 <= 0 { if(arg1 == 0) { if arg2 <= 0 { - panic "return sys.NaN()"; + return sys.NaN(); } return 0; } temp = floor(arg2); if temp != arg2 { - panic "return sys.NaN()"; + panic sys.NaN(); } l = long(temp); diff --git a/src/lib/math/sinh.go b/src/lib/math/sinh.go index 75f6ddd6310..a475171d7ab 100644 --- a/src/lib/math/sinh.go +++ b/src/lib/math/sinh.go @@ -48,7 +48,7 @@ sinh(arg double) double temp = exp(arg)/2; case arg > 0.5: -// temp = (exp(arg) - exp(-arg))/2; + temp = (exp(arg) - exp(-arg))/2; default: argsq = arg*arg; @@ -71,5 +71,5 @@ cosh(arg double) double if arg > 21 { return exp(arg)/2; } -// return (exp(arg) + exp(-arg))/2; + return (exp(arg) + exp(-arg))/2; } diff --git a/src/lib/math/sqrt.go b/src/lib/math/sqrt.go index c5c01584fe3..c1a9e862247 100644 --- a/src/lib/math/sqrt.go +++ b/src/lib/math/sqrt.go @@ -19,11 +19,10 @@ sqrt(arg double) double var x, temp double; var exp, i int; -/* BUG: NO isINF if sys.isInf(arg, 1) { return arg; } -*/ + if arg <= 0 { if arg < 0 { panic "return sys.NaN()" diff --git a/src/lib/math/tan.go b/src/lib/math/tan.go index 695352ae5e3..11c03009f2c 100644 --- a/src/lib/math/tan.go +++ b/src/lib/math/tan.go @@ -62,7 +62,7 @@ tan(arg double) double if flag { if(temp == 0) { - panic "return sys.NaN()"; + panic sys.NaN(); } temp = 1/temp; } diff --git a/src/lib9/utf/mkrunetype.c b/src/lib9/utf/mkrunetype.c new file mode 100644 index 00000000000..f1a9f8a77a9 --- /dev/null +++ b/src/lib9/utf/mkrunetype.c @@ -0,0 +1,733 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * make is(upper|lower|title|space|alpha)rune and + * to(upper|lower|title)rune from a UnicodeData.txt file. + * these can be found at unicode.org + * + * with -c, runs a check of the existing runetype functions vs. + * those extracted from UnicodeData. + * + * with -p, generates tables for pairs of chars, as well as for ranges + * and singletons. + * + * UnicodeData defines 4 fields of interest: + * 1) a category + * 2) an upper case mapping + * 3) a lower case mapping + * 4) a title case mapping + * + * toupper, tolower, and totitle are defined directly from the mapping. + * + * isalpharune(c) is true iff c is a "letter" category + * isupperrune(c) is true iff c is the target of toupperrune, + * or is in the uppercase letter category + * similarly for islowerrune and istitlerune. + * isspacerune is true for space category chars, "C" locale white space chars, + * and two additions: + * 0085 "next line" control char + * feff] "zero-width non-break space" + * isdigitrune is true iff c is a numeric-digit category. + */ + +#include +#include +#include +#include +#include +#include "utf.h" +#include "utfdef.h" + +enum { + /* + * fields in the unicode data file + */ + FIELD_CODE, + FIELD_NAME, + FIELD_CATEGORY, + FIELD_COMBINING, + FIELD_BIDIR, + FIELD_DECOMP, + FIELD_DECIMAL_DIG, + FIELD_DIG, + FIELD_NUMERIC_VAL, + FIELD_MIRRORED, + FIELD_UNICODE_1_NAME, + FIELD_COMMENT, + FIELD_UPPER, + FIELD_LOWER, + FIELD_TITLE, + NFIELDS, + + MAX_LINE = 1024, + + TO_OFFSET = 1 << 20, + + NRUNES = 1 << 21, +}; + +#define TO_DELTA(xmapped,x) (TO_OFFSET + (xmapped) - (x)) + +static char myisspace[NRUNES]; +static char myisalpha[NRUNES]; +static char myisdigit[NRUNES]; +static char myisupper[NRUNES]; +static char myislower[NRUNES]; +static char myistitle[NRUNES]; + +static int mytoupper[NRUNES]; +static int mytolower[NRUNES]; +static int mytotitle[NRUNES]; + +static void check(void); +static void mktables(char *src, int usepairs); +static void fatal(const char *fmt, ...); +static int mygetfields(char **fields, int nfields, char *str, const char *delim); +static int getunicodeline(FILE *in, char **fields, char *buf); +static int getcode(char *s); + +static void +usage(void) +{ + fprintf(stderr, "usage: mktables [-cp] \n"); + exit(1); +} + +int +main(int argc, char *argv[]){ + FILE *in; + char buf[MAX_LINE], buf2[MAX_LINE]; + char *fields[NFIELDS + 1], *fields2[NFIELDS + 1]; + char *p; + int i, code, last, docheck, usepairs; + + docheck = 0; + usepairs = 0; + ARGBEGIN{ + case 'c': + docheck = 1; + break; + case 'p': + usepairs = 1; + break; + default: + usage(); + }ARGEND + + if(argc != 1){ + usage(); + } + + in = fopen(argv[0], "r"); + if(in == NULL){ + fatal("can't open %s", argv[0]); + } + + for(i = 0; i < NRUNES; i++){ + mytoupper[i] = i; + mytolower[i] = i; + mytotitle[i] = i; + } + + /* + * make sure isspace has all of the "C" locale whitespace chars + */ + myisspace['\t'] = 1; + myisspace['\n'] = 1; + myisspace['\r'] = 1; + myisspace['\f'] = 1; + myisspace['\v'] = 1; + + /* + * a couple of other exceptions + */ + myisspace[0x85] = 1; /* control char, "next line" */ + myisspace[0xfeff] = 1; /* zero-width non-break space */ + + last = -1; + while(getunicodeline(in, fields, buf)){ + code = getcode(fields[FIELD_CODE]); + if (code >= NRUNES) + fatal("code-point value too big: %x", code); + if(code <= last) + fatal("bad code sequence: %x then %x", last, code); + last = code; + + /* + * check for ranges + */ + p = fields[FIELD_CATEGORY]; + if(strstr(fields[FIELD_NAME], ", First>") != NULL){ + if(!getunicodeline(in, fields2, buf2)) + fatal("range start at eof"); + if (strstr(fields2[FIELD_NAME], ", Last>") == NULL) + fatal("range start not followed by range end"); + last = getcode(fields2[FIELD_CODE]); + if(last <= code) + fatal("range out of sequence: %x then %x", code, last); + if(strcmp(p, fields2[FIELD_CATEGORY]) != 0) + fatal("range with mismatched category"); + } + + /* + * set properties and conversions + */ + for (; code <= last; code++){ + if(p[0] == 'L') + myisalpha[code] = 1; + if(p[0] == 'Z') + myisspace[code] = 1; + + if(strcmp(p, "Lu") == 0) + myisupper[code] = 1; + if(strcmp(p, "Ll") == 0) + myislower[code] = 1; + + if(strcmp(p, "Lt") == 0) + myistitle[code] = 1; + + if(strcmp(p, "Nd") == 0) + myisdigit[code] = 1; + + /* + * when finding conversions, also need to mark + * upper/lower case, since some chars, like + * "III" (0x2162), aren't defined as letters but have a + * lower case mapping ("iii" (0x2172)). + */ + if(fields[FIELD_UPPER][0] != '\0'){ + mytoupper[code] = getcode(fields[FIELD_UPPER]); + } + if(fields[FIELD_LOWER][0] != '\0'){ + mytolower[code] = getcode(fields[FIELD_LOWER]); + } + if(fields[FIELD_TITLE][0] != '\0'){ + mytotitle[code] = getcode(fields[FIELD_TITLE]); + } + } + } + + fclose(in); + + /* + * check for codes with no totitle mapping but a toupper mapping. + * these appear in UnicodeData-2.0.14.txt, but are almost certainly + * erroneous. + */ + for(i = 0; i < NRUNES; i++){ + if(mytotitle[i] == i + && mytoupper[i] != i + && !myistitle[i]) + fprintf(stderr, "warning: code=%.4x not istitle, totitle is same, toupper=%.4x\n", i, mytoupper[i]); + } + + /* + * make sure isupper[c] is true if for some x toupper[x] == c + * ditto for islower and istitle + */ + for(i = 0; i < NRUNES; i++) { + if(mytoupper[i] != i) + myisupper[mytoupper[i]] = 1; + if(mytolower[i] != i) + myislower[mytolower[i]] = 1; + if(mytotitle[i] != i) + myistitle[mytotitle[i]] = 1; + } + + if(docheck){ + check(); + }else{ + mktables(argv[0], usepairs); + } + return 0; +} + +/* + * generate a properties array for ranges, clearing those cases covered. + * if force, generate one-entry ranges for singletons. + */ +static int +mkisrange(const char* label, char* prop, int force) +{ + int start, stop, some; + + /* + * first, the ranges + */ + some = 0; + for(start = 0; start < NRUNES; ) { + if(!prop[start]){ + start++; + continue; + } + + for(stop = start + 1; stop < NRUNES; stop++){ + if(!prop[stop]){ + break; + } + prop[stop] = 0; + } + if(force || stop != start + 1){ + if(!some){ + printf("static Rune __is%sr[] = {\n", label); + some = 1; + } + prop[start] = 0; + printf("\t0x%.4x, 0x%.4x,\n", start, stop - 1); + } + + start = stop; + } + if(some) + printf("};\n\n"); + return some; +} + +/* + * generate a mapping array for pairs with a skip between, + * clearing those entries covered. + */ +static int +mkispair(const char *label, char *prop) +{ + int start, stop, some; + + some = 0; + for(start = 0; start + 2 < NRUNES; ) { + if(!prop[start]){ + start++; + continue; + } + + for(stop = start + 2; stop < NRUNES; stop += 2){ + if(!prop[stop]){ + break; + } + prop[stop] = 0; + } + if(stop != start + 2){ + if(!some){ + printf("static Rune __is%sp[] = {\n", label); + some = 1; + } + prop[start] = 0; + printf("\t0x%.4x, 0x%.4x,\n", start, stop - 2); + } + + start = stop; + } + if(some) + printf("};\n\n"); + return some; +} + +/* + * generate a properties array for singletons, clearing those cases covered. + */ +static int +mkissingle(const char *label, char *prop) +{ + int start, some; + + some = 0; + for(start = 0; start < NRUNES; start++) { + if(!prop[start]){ + continue; + } + + if(!some){ + printf("static Rune __is%ss[] = {\n", label); + some = 1; + } + prop[start] = 0; + printf("\t0x%.4x,\n", start); + } + if(some) + printf("};\n\n"); + return some; +} + +/* + * generate tables and a function for is