mirror of
https://github.com/golang/go
synced 2024-11-17 18:54:42 -07:00
go/token: add IsIdentifier, IsKeyword, and IsExported
Telling whether a string is a valid Go identifier can seem like an easy task, but it's easy to forget about the edge cases. For example, some implementations out there forget that an empty string or keywords like "func" aren't valid identifiers. Add a simple implementation with proper Unicode support, and start using it in cmd/cover and cmd/doc. Other pieces of the standard library reimplement part of this logic, but don't use a "func(string) bool" signature, so we're leaving them untouched for now. Add some tests too, to ensure that we actually got these edge cases correctly. Since telling whether a string is a valid identifier requires knowing that it's not a valid keyword, add IsKeyword too. The internal map was already accessible via Lookup, but "Lookup(str) != IDENT" isn't as easy to understand as IsKeyword(str). And, as per Josh's suggestion, we could have IsKeyword (and probably Lookup too) use a perfect hash function instead of a global map. Finally, for consistency with these new functions, add IsExported. That makes go/ast.IsExported a bit redundant, so perhaps it can be deprecated in favor of go/token.IsExported in the future. Clarify that token.IsExported doesn't imply token.IsIdentifier, to avoid ambiguity. Fixes #30064. Change-Id: I0e0e49215fd7e47b603ebc2b5a44086c51ba57f7 Reviewed-on: https://go-review.googlesource.com/c/go/+/169018 Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org> Reviewed-by: Alan Donovan <adonovan@google.com>
This commit is contained in:
parent
a01d108e30
commit
60a8dbf3b4
@ -16,7 +16,6 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
"unicode"
|
|
||||||
|
|
||||||
"cmd/internal/edit"
|
"cmd/internal/edit"
|
||||||
"cmd/internal/objabi"
|
"cmd/internal/objabi"
|
||||||
@ -117,7 +116,7 @@ func parseFlags() error {
|
|||||||
return fmt.Errorf("too many options")
|
return fmt.Errorf("too many options")
|
||||||
}
|
}
|
||||||
|
|
||||||
if *varVar != "" && !isValidIdentifier(*varVar) {
|
if *varVar != "" && !token.IsIdentifier(*varVar) {
|
||||||
return fmt.Errorf("-var: %q is not a valid identifier", *varVar)
|
return fmt.Errorf("-var: %q is not a valid identifier", *varVar)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -685,22 +684,6 @@ func (f *File) addVariables(w io.Writer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func isValidIdentifier(ident string) bool {
|
|
||||||
if len(ident) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i, c := range ident {
|
|
||||||
if i > 0 && unicode.IsDigit(c) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if c == '_' || unicode.IsLetter(c) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// It is possible for positions to repeat when there is a line
|
// It is possible for positions to repeat when there is a line
|
||||||
// directive that does not specify column information and the input
|
// directive that does not specify column information and the input
|
||||||
// has not been passed through gofmt.
|
// has not been passed through gofmt.
|
||||||
|
@ -42,6 +42,7 @@ import (
|
|||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"go/build"
|
"go/build"
|
||||||
|
"go/token"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
@ -333,30 +334,20 @@ func parseSymbol(str string) (symbol, method string) {
|
|||||||
case 1:
|
case 1:
|
||||||
case 2:
|
case 2:
|
||||||
method = elem[1]
|
method = elem[1]
|
||||||
isIdentifier(method)
|
if !token.IsIdentifier(method) {
|
||||||
|
log.Fatalf("invalid identifier %q", method)
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
log.Printf("too many periods in symbol specification")
|
log.Printf("too many periods in symbol specification")
|
||||||
usage()
|
usage()
|
||||||
}
|
}
|
||||||
symbol = elem[0]
|
symbol = elem[0]
|
||||||
isIdentifier(symbol)
|
if !token.IsIdentifier(symbol) {
|
||||||
|
log.Fatalf("invalid identifier %q", symbol)
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// isIdentifier checks that the name is valid Go identifier, and
|
|
||||||
// logs and exits if it is not.
|
|
||||||
func isIdentifier(name string) {
|
|
||||||
if len(name) == 0 {
|
|
||||||
log.Fatal("empty symbol")
|
|
||||||
}
|
|
||||||
for i, ch := range name {
|
|
||||||
if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
log.Fatalf("invalid identifier %q", name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// isExported reports whether the name is an exported identifier.
|
// isExported reports whether the name is an exported identifier.
|
||||||
// If the unexported flag (-u) is true, isExported returns true because
|
// If the unexported flag (-u) is true, isExported returns true because
|
||||||
// it means that we treat the name as if it is exported.
|
// it means that we treat the name as if it is exported.
|
||||||
|
@ -10,8 +10,6 @@ package ast
|
|||||||
import (
|
import (
|
||||||
"go/token"
|
"go/token"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
@ -523,18 +521,13 @@ func (*ChanType) exprNode() {}
|
|||||||
//
|
//
|
||||||
func NewIdent(name string) *Ident { return &Ident{token.NoPos, name, nil} }
|
func NewIdent(name string) *Ident { return &Ident{token.NoPos, name, nil} }
|
||||||
|
|
||||||
// IsExported reports whether name is an exported Go symbol
|
// IsExported reports whether name starts with an upper-case letter.
|
||||||
// (that is, whether it begins with an upper-case letter).
|
|
||||||
//
|
//
|
||||||
func IsExported(name string) bool {
|
func IsExported(name string) bool { return token.IsExported(name) }
|
||||||
ch, _ := utf8.DecodeRuneInString(name)
|
|
||||||
return unicode.IsUpper(ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsExported reports whether id is an exported Go symbol
|
// IsExported reports whether id starts with an upper-case letter.
|
||||||
// (that is, whether it begins with an uppercase letter).
|
|
||||||
//
|
//
|
||||||
func (id *Ident) IsExported() bool { return IsExported(id.Name) }
|
func (id *Ident) IsExported() bool { return token.IsExported(id.Name) }
|
||||||
|
|
||||||
func (id *Ident) String() string {
|
func (id *Ident) String() string {
|
||||||
if id != nil {
|
if id != nil {
|
||||||
|
@ -7,7 +7,11 @@
|
|||||||
//
|
//
|
||||||
package token
|
package token
|
||||||
|
|
||||||
import "strconv"
|
import (
|
||||||
|
"strconv"
|
||||||
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
// Token is the set of lexical tokens of the Go programming language.
|
// Token is the set of lexical tokens of the Go programming language.
|
||||||
type Token int
|
type Token int
|
||||||
@ -306,3 +310,31 @@ func (tok Token) IsOperator() bool { return operator_beg < tok && tok < operator
|
|||||||
// it returns false otherwise.
|
// it returns false otherwise.
|
||||||
//
|
//
|
||||||
func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
|
func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
|
||||||
|
|
||||||
|
// IsExported reports whether name starts with an upper-case letter.
|
||||||
|
//
|
||||||
|
func IsExported(name string) bool {
|
||||||
|
ch, _ := utf8.DecodeRuneInString(name)
|
||||||
|
return unicode.IsUpper(ch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsKeyword reports whether name is a Go keyword, such as "func" or "return".
|
||||||
|
//
|
||||||
|
func IsKeyword(name string) bool {
|
||||||
|
// TODO: opt: use a perfect hash function instead of a global map.
|
||||||
|
_, ok := keywords[name]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsIdentifier reports whether name is a Go identifier, that is, a non-empty
|
||||||
|
// string made up of letters, digits, and underscores, where the first character
|
||||||
|
// is not a digit. Keywords are not identifiers.
|
||||||
|
//
|
||||||
|
func IsIdentifier(name string) bool {
|
||||||
|
for i, c := range name {
|
||||||
|
if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return name != "" && !IsKeyword(name)
|
||||||
|
}
|
||||||
|
33
src/go/token/token_test.go
Normal file
33
src/go/token/token_test.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
// Copyright 2019 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package token
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestIsIdentifier(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
in string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"Empty", "", false},
|
||||||
|
{"Space", " ", false},
|
||||||
|
{"SpaceSuffix", "foo ", false},
|
||||||
|
{"Number", "123", false},
|
||||||
|
{"Keyword", "func", false},
|
||||||
|
|
||||||
|
{"LettersASCII", "foo", true},
|
||||||
|
{"MixedASCII", "_bar123", true},
|
||||||
|
{"UppercaseKeyword", "Func", true},
|
||||||
|
{"LettersUnicode", "fóö", true},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
if got := IsIdentifier(test.in); got != test.want {
|
||||||
|
t.Fatalf("IsIdentifier(%q) = %t, want %v", test.in, got, test.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user