1
0
mirror of https://github.com/golang/go synced 2024-11-17 18:54:42 -07:00

go/token: add IsIdentifier, IsKeyword, and IsExported

Telling whether a string is a valid Go identifier can seem like an easy
task, but it's easy to forget about the edge cases. For example, some
implementations out there forget that an empty string or keywords like
"func" aren't valid identifiers.

Add a simple implementation with proper Unicode support, and start using
it in cmd/cover and cmd/doc. Other pieces of the standard library
reimplement part of this logic, but don't use a "func(string) bool"
signature, so we're leaving them untouched for now.

Add some tests too, to ensure that we actually got these edge cases
correctly.

Since telling whether a string is a valid identifier requires knowing
that it's not a valid keyword, add IsKeyword too. The internal map was
already accessible via Lookup, but "Lookup(str) != IDENT" isn't as easy
to understand as IsKeyword(str). And, as per Josh's suggestion, we could
have IsKeyword (and probably Lookup too) use a perfect hash function
instead of a global map.

Finally, for consistency with these new functions, add IsExported. That
makes go/ast.IsExported a bit redundant, so perhaps it can be deprecated
in favor of go/token.IsExported in the future. Clarify that
token.IsExported doesn't imply token.IsIdentifier, to avoid ambiguity.

Fixes #30064.

Change-Id: I0e0e49215fd7e47b603ebc2b5a44086c51ba57f7
Reviewed-on: https://go-review.googlesource.com/c/go/+/169018
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
This commit is contained in:
Daniel Martí 2019-03-23 16:20:35 +00:00 committed by Robert Griesemer
parent a01d108e30
commit 60a8dbf3b4
5 changed files with 78 additions and 46 deletions

View File

@ -16,7 +16,6 @@ import (
"log" "log"
"os" "os"
"sort" "sort"
"unicode"
"cmd/internal/edit" "cmd/internal/edit"
"cmd/internal/objabi" "cmd/internal/objabi"
@ -117,7 +116,7 @@ func parseFlags() error {
return fmt.Errorf("too many options") return fmt.Errorf("too many options")
} }
if *varVar != "" && !isValidIdentifier(*varVar) { if *varVar != "" && !token.IsIdentifier(*varVar) {
return fmt.Errorf("-var: %q is not a valid identifier", *varVar) return fmt.Errorf("-var: %q is not a valid identifier", *varVar)
} }
@ -685,22 +684,6 @@ func (f *File) addVariables(w io.Writer) {
} }
} }
func isValidIdentifier(ident string) bool {
if len(ident) == 0 {
return false
}
for i, c := range ident {
if i > 0 && unicode.IsDigit(c) {
continue
}
if c == '_' || unicode.IsLetter(c) {
continue
}
return false
}
return true
}
// It is possible for positions to repeat when there is a line // It is possible for positions to repeat when there is a line
// directive that does not specify column information and the input // directive that does not specify column information and the input
// has not been passed through gofmt. // has not been passed through gofmt.

View File

@ -42,6 +42,7 @@ import (
"flag" "flag"
"fmt" "fmt"
"go/build" "go/build"
"go/token"
"io" "io"
"log" "log"
"os" "os"
@ -333,30 +334,20 @@ func parseSymbol(str string) (symbol, method string) {
case 1: case 1:
case 2: case 2:
method = elem[1] method = elem[1]
isIdentifier(method) if !token.IsIdentifier(method) {
log.Fatalf("invalid identifier %q", method)
}
default: default:
log.Printf("too many periods in symbol specification") log.Printf("too many periods in symbol specification")
usage() usage()
} }
symbol = elem[0] symbol = elem[0]
isIdentifier(symbol) if !token.IsIdentifier(symbol) {
log.Fatalf("invalid identifier %q", symbol)
}
return return
} }
// isIdentifier checks that the name is valid Go identifier, and
// logs and exits if it is not.
func isIdentifier(name string) {
if len(name) == 0 {
log.Fatal("empty symbol")
}
for i, ch := range name {
if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
continue
}
log.Fatalf("invalid identifier %q", name)
}
}
// isExported reports whether the name is an exported identifier. // isExported reports whether the name is an exported identifier.
// If the unexported flag (-u) is true, isExported returns true because // If the unexported flag (-u) is true, isExported returns true because
// it means that we treat the name as if it is exported. // it means that we treat the name as if it is exported.

View File

@ -10,8 +10,6 @@ package ast
import ( import (
"go/token" "go/token"
"strings" "strings"
"unicode"
"unicode/utf8"
) )
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@ -523,18 +521,13 @@ func (*ChanType) exprNode() {}
// //
func NewIdent(name string) *Ident { return &Ident{token.NoPos, name, nil} } func NewIdent(name string) *Ident { return &Ident{token.NoPos, name, nil} }
// IsExported reports whether name is an exported Go symbol // IsExported reports whether name starts with an upper-case letter.
// (that is, whether it begins with an upper-case letter).
// //
func IsExported(name string) bool { func IsExported(name string) bool { return token.IsExported(name) }
ch, _ := utf8.DecodeRuneInString(name)
return unicode.IsUpper(ch)
}
// IsExported reports whether id is an exported Go symbol // IsExported reports whether id starts with an upper-case letter.
// (that is, whether it begins with an uppercase letter).
// //
func (id *Ident) IsExported() bool { return IsExported(id.Name) } func (id *Ident) IsExported() bool { return token.IsExported(id.Name) }
func (id *Ident) String() string { func (id *Ident) String() string {
if id != nil { if id != nil {

View File

@ -7,7 +7,11 @@
// //
package token package token
import "strconv" import (
"strconv"
"unicode"
"unicode/utf8"
)
// Token is the set of lexical tokens of the Go programming language. // Token is the set of lexical tokens of the Go programming language.
type Token int type Token int
@ -306,3 +310,31 @@ func (tok Token) IsOperator() bool { return operator_beg < tok && tok < operator
// it returns false otherwise. // it returns false otherwise.
// //
func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end } func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
// IsExported reports whether name starts with an upper-case letter.
//
func IsExported(name string) bool {
ch, _ := utf8.DecodeRuneInString(name)
return unicode.IsUpper(ch)
}
// IsKeyword reports whether name is a Go keyword, such as "func" or "return".
//
func IsKeyword(name string) bool {
// TODO: opt: use a perfect hash function instead of a global map.
_, ok := keywords[name]
return ok
}
// IsIdentifier reports whether name is a Go identifier, that is, a non-empty
// string made up of letters, digits, and underscores, where the first character
// is not a digit. Keywords are not identifiers.
//
func IsIdentifier(name string) bool {
for i, c := range name {
if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
return false
}
}
return name != "" && !IsKeyword(name)
}

View File

@ -0,0 +1,33 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package token
import "testing"
func TestIsIdentifier(t *testing.T) {
tests := []struct {
name string
in string
want bool
}{
{"Empty", "", false},
{"Space", " ", false},
{"SpaceSuffix", "foo ", false},
{"Number", "123", false},
{"Keyword", "func", false},
{"LettersASCII", "foo", true},
{"MixedASCII", "_bar123", true},
{"UppercaseKeyword", "Func", true},
{"LettersUnicode", "fóö", true},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if got := IsIdentifier(test.in); got != test.want {
t.Fatalf("IsIdentifier(%q) = %t, want %v", test.in, got, test.want)
}
})
}
}