在不使用正则表达式的情况下替换整个单词

我正在使用string.Replace替换子字符串A


func removeIP(text string) string {

    text = strings.Replace(text, "someWord", "**NewWord**", -1)

    return text

}


func removeIPUsingRegex(text string) string {

    var re = regexp.MustCompile(`\b` + "someWord" + `\b`) // I want to replace whole word only

    text = re.ReplaceAllString(text, "**NewWord**")

}

我在这里面临的问题是,只有在字符串替换不支持的情况下,我才想替换整个单词。

因为我必须替换非常非常大的字符串,所以可能以 GB 为单位。与字符串替换相比,正则表达式非常慢。

eg: text: "abcdef defgh /def/ .def/ =def= def xxxy"-> Replace defwith DEF

output: "abcdef defgh /DEF/ .DEF/ =DEF= DEF xxxy"//注意只有整个单词被替换了。


正则表达式将时间缩短了近 100 倍(https://medium.com/codezillas/golang-replace-vs-regexp-de4e48482f53)。任何想法将不胜感激。


慕慕森
浏览 82回答 1
1回答

鸿蒙传说

使用的 KMP 算法// ReplaceWholeWord ...&nbsp;func ReplaceWholeWord(text string, oldWord string, newWord string) string {&nbsp; &nbsp; &nbsp; &nbsp; var patternLength = len(oldWord)&nbsp; &nbsp; &nbsp; &nbsp; var textLength = len(text)&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; var copyIndex = 0&nbsp; &nbsp; &nbsp; &nbsp; var textIndex = 0&nbsp; &nbsp; &nbsp; &nbsp; var patternIndex = 0&nbsp; &nbsp; &nbsp; &nbsp; var newString strings.Builder&nbsp; &nbsp; &nbsp; &nbsp; var lps = computeLPSArray(oldWord)&nbsp; &nbsp; &nbsp; &nbsp; for textIndex < textLength {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if oldWord[patternIndex] == text[textIndex] {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; patternIndex++&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; textIndex++&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if patternIndex == patternLength {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; startIndex := textIndex - patternIndex&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; endIndex := textIndex - patternIndex + patternLength - 1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if checkIfWholeWord(text, startIndex, endIndex) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if copyIndex != startIndex {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; newString.WriteString(text[copyIndex:startIndex])&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; newString.WriteString(newWord)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; copyIndex = endIndex + 1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; patternIndex = 0&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; textIndex = endIndex + 1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; } else if textIndex < textLength && oldWord[patternIndex] != text[textIndex] {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if patternIndex != 0 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; patternIndex = lps[patternIndex-1]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; } else {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; textIndex = textIndex + 1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; newString.WriteString(text[copyIndex:])&nbsp; &nbsp; &nbsp; &nbsp; return newString.String()&nbsp; &nbsp; }&nbsp; &nbsp; func computeLPSArray(pattern string) []int {&nbsp; &nbsp; &nbsp; &nbsp; var length = 0&nbsp; &nbsp; &nbsp; &nbsp; var i = 1&nbsp; &nbsp; &nbsp; &nbsp; var patternLength = len(pattern)&nbsp; &nbsp; &nbsp; &nbsp; var lps = make([]int, patternLength)&nbsp; &nbsp; &nbsp; &nbsp; lps[0] = 0&nbsp; &nbsp; &nbsp; &nbsp; for i = 1; i < patternLength; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if pattern[i] == pattern[length] {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; length++&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; lps[i] = length&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; i++&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; } else {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if length != 0 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; length = lps[length-1]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; } else {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; lps[i] = length&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; i++&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; return lps&nbsp; &nbsp; }&nbsp; &nbsp; func checkIfWholeWord(text string, startIndex int, endIndex int) bool {&nbsp; &nbsp; &nbsp; &nbsp; startIndex = startIndex - 1&nbsp; &nbsp; &nbsp; &nbsp; endIndex = endIndex + 1&nbsp; &nbsp; &nbsp; &nbsp; if (startIndex < 0 && endIndex >= len(text)) ||&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (startIndex < 0 && endIndex < len(text) && isNonWord(text[endIndex])) ||&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (startIndex >= 0 && endIndex >= len(text) && isNonWord(text[startIndex])) ||&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (startIndex >= 0 && endIndex < len(text) && isNonWord(text[startIndex]) && isNonWord(text[endIndex])) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; return true&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; return false&nbsp; &nbsp; }&nbsp; &nbsp; func isNonWord(c byte) bool {&nbsp; &nbsp; &nbsp; &nbsp; return !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_'))&nbsp; &nbsp; }
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Go