隔江千里
不是关于最快的答案,而是关于使用Go语言来优化一段代码的方法的逐步回答。有关什么是最快的非常正式的见解,请参阅 https://stackoverflow.com/a/6072100/4466350您的代码有问题。始终编写测试。一、让我们用写一个主package mainimport ( "math/rand" "sort")func main() {}func randSlice(max int) (ret []uint32) { // we should check that max does not exceed maxUINT32 ret = make([]uint32, 0, max) r := rand.New(rand.NewSource(99)) for i := 0; i < max; i++ { ret = append(ret, uint32(r.Intn(max))) } sort.Slice(ret, func(i, j int) bool { return ret[i] < ret[j] }) return}func dedup1(s []uint32) []uint32 { if len(s) < 2 { return s } tmp := make([]uint32, 0, len(s)) for i := uint32(0); i < uint32(len(s)); i++ { // If current is not equal to next then store the current if s[i] != s[i+1] { tmp = append(tmp, s[i]) } } // The last must be stored // Note that if it was repeated, the duplicates are NOT stored before tmp = append(tmp, s[len(s)-1]) // Modify original slice s = nil s = append(s, tmp...) return s}以及随附的测试package mainimport "testing"func TestDedup1(t *testing.T) { s := randSlice(10) res := dedup1(s) uniq := map[uint32]bool{} for _, r := range res { _, ok := uniq[r] if ok { t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res) } uniq[r] = true }}运行这个我们得到$ go test -v . === RUN TestDedup1--- FAIL: TestDedup1 (0.00s)panic: runtime error: index out of range [10] with length 10 [recovered] panic: runtime error: index out of range [10] with length 10goroutine 18 [running]:testing.tRunner.func1.1(0x536680, 0xc0000da040) /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1076 +0x30dtesting.tRunner.func1(0xc000082600) /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1079 +0x41apanic(0x536680, 0xc0000da040) /home/mh-cbon/.gvm/gos/go1.15.2/src/runtime/panic.go:969 +0x175test/d/dup.dedup1(0xc000094060, 0xa, 0xa, 0xa, 0x6124a0, 0xc00003c770) /home/mh-cbon/gow/src/test/d/dup/main.go:32 +0x248test/d/dup.TestDedup1(0xc000082600) /home/mh-cbon/gow/src/test/d/dup/main_test.go:7 +0x70testing.tRunner(0xc000082600, 0x54fbf0) /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1127 +0xefcreated by testing.(*T).Run /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1178 +0x386FAIL test/d/dup 0.006sFAIL我们通过适当地检查切片边界来解决此问题。在 中,将此条件更改为 ,或者更好的是,将迭代最大值减少 1dedup1if s[i] != s[i+1] {if i+1 < uint32(len(s)) && s[i] != s[i+1] {for i := uint32(0); i < uint32(len(s))-1; i++ {接下来,编写一个函数来生成具有随机重复项的切片。func randSliceWithDups(max int) (ret []uint32) { ret = randSlice(max / 2) ret = append(ret, ret...) sort.Slice(ret, func(i, j int) bool { return ret[i] < ret[j] }) return}写入获取切片,例如randSliceWithDups(50)[0 0 1 1 2 2 3 3 3 3 4 4 5 5 5 5 6 6 6 6 7 7 8 8 9 9 9 9 12 12 13 13 18 18 18 18 18 18 19 19 20 20 20 20 21 21 22 22 24 24]再次更新测试func TestDedup1_with_dups(t *testing.T) { s := randSliceWithDups(10) res := dedup1(s) uniq := map[uint32]bool{} for _, r := range res { _, ok := uniq[r] if ok { t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res) } uniq[r] = true }}接下来,添加一个基准测试;它将帮助我们发现性能问题并随着时间的推移保持性能。func BenchmarkDedup1_1000(b *testing.B) { s := randSliceWithDups(100) b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { _ = dedup1(s) }}运行这个我们得到:$ go test -v . -bench=.=== RUN TestDedup1--- PASS: TestDedup1 (0.00s)=== RUN TestDedup1_with_dups--- PASS: TestDedup1_with_dups (0.00s)goos: linuxgoarch: amd64pkg: test/d/dupBenchmarkDedup1_1000BenchmarkDedup1_1000-4 172087 6353 ns/op 5504 B/op 2 allocs/opPASSok test/d/dup 1.174s让我们陈述一下,每个人都发现阅读你的初始代码,甚至没有编写你正在分配的基准测试。这就提出了一个问题,即确定是否允许您就地修改输入切片。如果您可以将其更改到位,我们可能会利用这一点来防止该分配并加快您的程序。一种从头开始编写的解决方案(考虑在网站等极客网站上搜索普遍接受的解决方案),是迭代切片并维护下一个要写入的位置的索引。找到非重复项时,将非重复项保存到此最后一个位置,然后将该索引递增 1。最后,将切片向上返回为递增索引的值。func dedup2(s []uint32) []uint32 { if len(s) < 2 { return s } var e int for i := 1; i < len(s); i++ { if s[i] == s[i-1] { continue } s[e] = s[i] e++ } return s[:e]}同样,添加测试和工作台,并检查结果。func TestDedup2(t *testing.T) { s := randSlice(10) res := dedup2(s) uniq := map[uint32]bool{} for _, r := range res { _, ok := uniq[r] if ok { t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res) } uniq[r] = true }}func TestDedup2_with_dups(t *testing.T) { s := randSliceWithDups(10) res := dedup2(s) uniq := map[uint32]bool{} for _, r := range res { _, ok := uniq[r] if ok { t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res) } uniq[r] = true }}func BenchmarkDedup2_1000(b *testing.B) { s := randSliceWithDups(100) b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { _ = dedup2(s) }}其产生:$ go test -v . -bench=.=== RUN TestDedup1--- PASS: TestDedup1 (0.00s)=== RUN TestDedup1_with_dups--- PASS: TestDedup1_with_dups (0.00s)=== RUN TestDedup2--- PASS: TestDedup2 (0.00s)=== RUN TestDedup2_with_dups--- PASS: TestDedup2_with_dups (0.00s)goos: linuxgoarch: amd64pkg: test/d/dupBenchmarkDedup1_1000BenchmarkDedup1_1000-4 1764574 673 ns/op 544 B/op 2 allocs/opBenchmarkDedup2_1000BenchmarkDedup2_1000-4 7758907 152 ns/op 0 B/op 0 allocs/opPASSok test/d/dup 3.224s4倍的改进!凉!下一步是什么?接下来是找到一个更好的算法,它产生更少的执行,更少的查找等等。不过,最后一个版本包含一个错误!你发现它了吗?请参阅此测试,它比另一个测试更好,因为它不依赖于随机数,而是依赖于具有强相等性检查的静态值。使用这些类型的测试,您可以定制输入以检查细粒度情况。func TestDedup2_static(t *testing.T) { type expectation struct { input []uint32 want []uint32 } expectations := []expectation{ expectation{ input: []uint32{0, 0, 1, 2, 3, 3, 3, 4, 4, 5}, want: []uint32{0, 1, 2, 3, 4, 5}, }, expectation{ input: []uint32{0, 1, 2, 3, 3, 3, 4, 4, 5}, want: []uint32{0, 1, 2, 3, 4, 5}, }, } for _, e := range expectations { res := dedup2(e.input) if !reflect.DeepEqual(res, e.want) { t.Fatalf("invlaid result, wanted=%#v\ngot=%#v\n", e.want, res) } }}它使用表驱动器测试,如 https://dave.cheney.net/2013/06/09/writing-table-driven-tests-in-go 中所述让我们解决这个问题:func dedup2(s []uint32) []uint32 { if len(s) < 2 { return s } var e int = 1 for i := 1; i < len(s); i++ { if s[i] == s[i-1] { continue } s[e] = s[i] e++ } return s[:e]}