1
0
mirror of https://github.com/golang/go synced 2024-11-25 10:57:58 -07:00

improved sentence extraction:

- don't forget first periods
- look at capitalization of last char before periods

R=rsc
http://go/go-review/1024027
This commit is contained in:
Robert Griesemer 2009-11-08 21:48:51 -08:00
parent ed6eb5b577
commit c532940ecf

View File

@ -23,6 +23,7 @@ import (
"sync"; "sync";
"template"; "template";
"time"; "time";
"unicode";
"utf8"; "utf8";
) )
@ -137,21 +138,38 @@ func htmlEscape(s string) string {
func firstSentence(s string) string { func firstSentence(s string) string {
// find first period followed by whitespace, or just the first period i := -1; // index+1 of first period
i := -1; j := -1; // index+1 of first period that is followed by white space
for j, ch := range s { prev := 'A';
for k, ch := range s {
k1 := k+1;
if ch == '.' { if ch == '.' {
i = j+1; // include period if i < 0 {
if i < len(s) && s[i] <= ' ' { i = k1; // first period
}
if k1 < len(s) && s[k1] <= ' ' {
if j < 0 {
j = k1; // first period followed by white space
}
if !unicode.IsUpper(prev) {
j = k1;
break; break;
} }
} }
} }
if i < 0 { prev = ch;
// no period found, use the enire string
i = len(s);
} }
return s[0:i];
if j < 0 {
// use the next best period
j = i;
if j < 0 {
// no period at all, use the entire string
j = len(s);
}
}
return s[0:j];
} }