如何选择具有自定义单词边界的第一个字符?

我用一系列这样的单词进行了测试用例:


    {

        input:    "Halley's Comet",

        expected: "HC",

    },

    {

        input:    "First In, First Out",

        expected: "FIFO",

    },

    {

        input:    "The Road _Not_ Taken",

        expected: "TRNT",

    },

我希望用一个正则表达式来匹配这些单词的所有第一个字母,避免 char: "_" 作为第一个字母匹配,并计算单词中的单引号。

目前,我有这个正则表达式适用于 pcre 语法,但不适用于 Go regexp 包:(?<![a-zA-Z0-9'])([a-zA-Z0-9'])

我知道 Go 不支持环视,但我正在寻找一种好方法来做到这一点。


我还使用这个函数来获取所有字符串的数组:re.FindAllString(s, -1)


谢谢你的帮助。


PIPIONE
浏览 121回答 2
2回答

暮色呼如

处理字符类和单词边界的东西应该足够了:\b_*([a-z])[a-z]*(?:'s)?_*\b\W*演示用法:package mainimport (&nbsp; &nbsp; "fmt"&nbsp; &nbsp; "regexp")func main() {&nbsp; &nbsp; re := regexp.MustCompile(`(?i)\b_*([a-z])[a-z]*(?:'s)?_*\b\W*`)&nbsp; &nbsp; fmt.Println(re.ReplaceAllString("O'Brian's dog", "$1"))}

慕尼黑5688855

ftr,少正则表达式的解决方案package mainimport (&nbsp; &nbsp; "fmt")func main() {&nbsp; &nbsp; inputs := []string{"Hallمرحباey's Comet", "First In, First Out", "The Road _Not_ Taken", "O'Brian's Dog"}&nbsp; &nbsp; c := [][]string{}&nbsp; &nbsp; w := [][]string{}&nbsp; &nbsp; for _, input := range inputs {&nbsp; &nbsp; &nbsp; &nbsp; c = append(c, firstLet(input))&nbsp; &nbsp; &nbsp; &nbsp; w = append(w, words(input))&nbsp; &nbsp; }&nbsp; &nbsp; fmt.Printf("%#v\n", w)&nbsp; &nbsp; fmt.Printf("%#v\n", c)}func firstLet(in string) (out []string) {&nbsp; &nbsp; var inword bool&nbsp; &nbsp; for _, r := range in {&nbsp; &nbsp; &nbsp; &nbsp; if !inword {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if isChar(r) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; inword = true&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; out = append(out, string(r))&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; } else if r == ' ' {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; inword = false&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; &nbsp; return out}func words(in string) (out []string) {&nbsp; &nbsp; var inword bool&nbsp; &nbsp; var w []rune&nbsp; &nbsp; for _, r := range in {&nbsp; &nbsp; &nbsp; &nbsp; if !inword {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if isChar(r) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; w = append(w, r)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; inword = true&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; } else if r == ' ' {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if len(w) > 0 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; out = append(out, string(w))&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; w = w[:0]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; inword = false&nbsp; &nbsp; &nbsp; &nbsp; } else if r != '_' {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; w = append(w, r)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; &nbsp; if len(w) > 0 {&nbsp; &nbsp; &nbsp; &nbsp; out = append(out, string(w))&nbsp; &nbsp; }&nbsp; &nbsp; return out}func isChar(r rune) bool {&nbsp; &nbsp; return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')}输出[][]string{[]string{"Hallمرحباey's", "Comet"}, []string{"First", "In,", "First", "Out"}, []string{"The", "Road", "Not", "Taken"}, []string{"O'Brian's", "Dog"}}[][]string{[]string{"H", "C"}, []string{"F", "I", "F", "O"}, []string{"T", "R", "N", "T"}, []string{"O", "D"}}
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Go