1
0
mirror of https://github.com/golang/go synced 2024-11-23 04:10:04 -07:00

unicode: improve generated comments for categories

The comments on the category range tables in the unicode package are fairly
redundent and require an external source to translate into human readable
category names.

This adds a look up table with the category descriptions and uses it if
available when generating the comments for the range tables.

Fixes #28954

Change-Id: I853e2d270def6492c2c1dd2ad0ec761a74c04e5d
Reviewed-on: https://go-review.googlesource.com/c/151297
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Wil Selwood 2018-11-26 15:11:45 +00:00 committed by Brad Fitzpatrick
parent 6bf531384d
commit d704b5c956
2 changed files with 72 additions and 32 deletions

View File

@ -458,6 +458,39 @@ package unicode
`
var categoryMapping = map[string]string{
"Lu": "Letter, uppercase",
"Ll": "Letter, lowercase",
"Lt": "Letter, titlecase",
"Lm": "Letter, modifier",
"Lo": "Letter, other",
"Mn": "Mark, nonspacing",
"Mc": "Mark, spacing combining",
"Me": "Mark, enclosing",
"Nd": "Number, decimal digit",
"Nl": "Number, letter",
"No": "Number, other",
"Pc": "Punctuation, connector",
"Pd": "Punctuation, dash",
"Ps": "Punctuation, open",
"Pe": "Punctuation, close",
"Pi": "Punctuation, initial quote",
"Pf": "Punctuation, final quote",
"Po": "Punctuation, other",
"Sm": "Symbol, math",
"Sc": "Symbol, currency",
"Sk": "Symbol, modifier",
"So": "Symbol, other",
"Zs": "Separator, space",
"Zl": "Separator, line",
"Zp": "Separator, paragraph",
"Cc": "Other, control",
"Cf": "Other, format",
"Cs": "Other, surrogate",
"Co": "Other, private use",
"Cn": "Other, not assigned",
}
func printCategories() {
if *tablelist == "" {
return
@ -528,10 +561,17 @@ func printCategories() {
varDecl = "\tTitle = _Lt; // Title is the set of Unicode title case letters.\n"
}
if len(name) > 1 {
desc, ok := categoryMapping[name]
if ok {
varDecl += fmt.Sprintf(
"\t%s = _%s; // %s is the set of Unicode characters in category %s (%s).\n",
name, name, name, name, desc)
} else {
varDecl += fmt.Sprintf(
"\t%s = _%s; // %s is the set of Unicode characters in category %s.\n",
name, name, name, name)
}
}
decl[ndecl] = varDecl
ndecl++
if len(name) == 1 { // unified categories

View File

@ -3380,53 +3380,53 @@ var _Zs = &RangeTable{
// These variables have type *RangeTable.
var (
Cc = _Cc // Cc is the set of Unicode characters in category Cc.
Cf = _Cf // Cf is the set of Unicode characters in category Cf.
Co = _Co // Co is the set of Unicode characters in category Co.
Cs = _Cs // Cs is the set of Unicode characters in category Cs.
Cc = _Cc // Cc is the set of Unicode characters in category Cc (Other, control).
Cf = _Cf // Cf is the set of Unicode characters in category Cf (Other, format).
Co = _Co // Co is the set of Unicode characters in category Co (Other, private use).
Cs = _Cs // Cs is the set of Unicode characters in category Cs (Other, surrogate).
Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property.
Nd = _Nd // Nd is the set of Unicode characters in category Nd.
Nd = _Nd // Nd is the set of Unicode characters in category Nd (Number, decimal digit).
Letter = _L // Letter/L is the set of Unicode letters, category L.
L = _L
Lm = _Lm // Lm is the set of Unicode characters in category Lm.
Lo = _Lo // Lo is the set of Unicode characters in category Lo.
Lm = _Lm // Lm is the set of Unicode characters in category Lm (Letter, modifier).
Lo = _Lo // Lo is the set of Unicode characters in category Lo (Letter, other).
Lower = _Ll // Lower is the set of Unicode lower case letters.
Ll = _Ll // Ll is the set of Unicode characters in category Ll.
Ll = _Ll // Ll is the set of Unicode characters in category Ll (Letter, lowercase).
Mark = _M // Mark/M is the set of Unicode mark characters, category M.
M = _M
Mc = _Mc // Mc is the set of Unicode characters in category Mc.
Me = _Me // Me is the set of Unicode characters in category Me.
Mn = _Mn // Mn is the set of Unicode characters in category Mn.
Nl = _Nl // Nl is the set of Unicode characters in category Nl.
No = _No // No is the set of Unicode characters in category No.
Mc = _Mc // Mc is the set of Unicode characters in category Mc (Mark, spacing combining).
Me = _Me // Me is the set of Unicode characters in category Me (Mark, enclosing).
Mn = _Mn // Mn is the set of Unicode characters in category Mn (Mark, nonspacing).
Nl = _Nl // Nl is the set of Unicode characters in category Nl (Number, letter).
No = _No // No is the set of Unicode characters in category No (Number, other).
Number = _N // Number/N is the set of Unicode number characters, category N.
N = _N
Other = _C // Other/C is the set of Unicode control and special characters, category C.
C = _C
Pc = _Pc // Pc is the set of Unicode characters in category Pc.
Pd = _Pd // Pd is the set of Unicode characters in category Pd.
Pe = _Pe // Pe is the set of Unicode characters in category Pe.
Pf = _Pf // Pf is the set of Unicode characters in category Pf.
Pi = _Pi // Pi is the set of Unicode characters in category Pi.
Po = _Po // Po is the set of Unicode characters in category Po.
Ps = _Ps // Ps is the set of Unicode characters in category Ps.
Pc = _Pc // Pc is the set of Unicode characters in category Pc (Punctuation, connector).
Pd = _Pd // Pd is the set of Unicode characters in category Pd (Punctuation, dash).
Pe = _Pe // Pe is the set of Unicode characters in category Pe (Punctuation, close).
Pf = _Pf // Pf is the set of Unicode characters in category Pf (Punctuation, final quote).
Pi = _Pi // Pi is the set of Unicode characters in category Pi (Punctuation, initial quote).
Po = _Po // Po is the set of Unicode characters in category Po (Punctuation, other).
Ps = _Ps // Ps is the set of Unicode characters in category Ps (Punctuation, open).
Punct = _P // Punct/P is the set of Unicode punctuation characters, category P.
P = _P
Sc = _Sc // Sc is the set of Unicode characters in category Sc.
Sk = _Sk // Sk is the set of Unicode characters in category Sk.
Sm = _Sm // Sm is the set of Unicode characters in category Sm.
So = _So // So is the set of Unicode characters in category So.
Sc = _Sc // Sc is the set of Unicode characters in category Sc (Symbol, currency).
Sk = _Sk // Sk is the set of Unicode characters in category Sk (Symbol, modifier).
Sm = _Sm // Sm is the set of Unicode characters in category Sm (Symbol, math).
So = _So // So is the set of Unicode characters in category So (Symbol, other).
Space = _Z // Space/Z is the set of Unicode space characters, category Z.
Z = _Z
Symbol = _S // Symbol/S is the set of Unicode symbol characters, category S.
S = _S
Title = _Lt // Title is the set of Unicode title case letters.
Lt = _Lt // Lt is the set of Unicode characters in category Lt.
Lt = _Lt // Lt is the set of Unicode characters in category Lt (Letter, titlecase).
Upper = _Lu // Upper is the set of Unicode upper case letters.
Lu = _Lu // Lu is the set of Unicode characters in category Lu.
Zl = _Zl // Zl is the set of Unicode characters in category Zl.
Zp = _Zp // Zp is the set of Unicode characters in category Zp.
Zs = _Zs // Zs is the set of Unicode characters in category Zs.
Lu = _Lu // Lu is the set of Unicode characters in category Lu (Letter, uppercase).
Zl = _Zl // Zl is the set of Unicode characters in category Zl (Separator, line).
Zp = _Zp // Zp is the set of Unicode characters in category Zp (Separator, paragraph).
Zs = _Zs // Zs is the set of Unicode characters in category Zs (Separator, space).
)
// Generated by running