删除已排序 Go 切片重复项的最快方法

不是关于最快的答案，而是关于使用Go语言来优化一段代码的方法的逐步回答。有关什么是最快的非常正式的见解，请参阅 https://stackoverflow.com/a/6072100/4466350您的代码有问题。始终编写测试。一、让我们用写一个主package mainimport (    "math/rand"    "sort")func main() {}func randSlice(max int) (ret []uint32) {    // we should check that max does not exceed maxUINT32    ret = make([]uint32, 0, max)    r := rand.New(rand.NewSource(99))    for i := 0; i < max; i++ {        ret = append(ret, uint32(r.Intn(max)))    }    sort.Slice(ret, func(i, j int) bool {        return ret[i] < ret[j]    })    return}func dedup1(s []uint32) []uint32 {    if len(s) < 2 {        return s    }    tmp := make([]uint32, 0, len(s))    for i := uint32(0); i < uint32(len(s)); i++ {        // If current is not equal to next then store the current        if s[i] != s[i+1] {            tmp = append(tmp, s[i])        }    }    // The last must be stored    // Note that if it was repeated, the duplicates are NOT stored before    tmp = append(tmp, s[len(s)-1])    // Modify original slice    s = nil    s = append(s, tmp...)    return s}以及随附的测试package mainimport "testing"func TestDedup1(t *testing.T) {    s := randSlice(10)    res := dedup1(s)    uniq := map[uint32]bool{}    for _, r := range res {        _, ok := uniq[r]        if ok {            t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)        }        uniq[r] = true    }}运行这个我们得到$ go test -v . === RUN   TestDedup1--- FAIL: TestDedup1 (0.00s)panic: runtime error: index out of range [10] with length 10 [recovered]    panic: runtime error: index out of range [10] with length 10goroutine 18 [running]:testing.tRunner.func1.1(0x536680, 0xc0000da040)    /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1076 +0x30dtesting.tRunner.func1(0xc000082600)    /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1079 +0x41apanic(0x536680, 0xc0000da040)    /home/mh-cbon/.gvm/gos/go1.15.2/src/runtime/panic.go:969 +0x175test/d/dup.dedup1(0xc000094060, 0xa, 0xa, 0xa, 0x6124a0, 0xc00003c770)    /home/mh-cbon/gow/src/test/d/dup/main.go:32 +0x248test/d/dup.TestDedup1(0xc000082600)    /home/mh-cbon/gow/src/test/d/dup/main_test.go:7 +0x70testing.tRunner(0xc000082600, 0x54fbf0)    /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1127 +0xefcreated by testing.(*T).Run    /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1178 +0x386FAIL    test/d/dup  0.006sFAIL我们通过适当地检查切片边界来解决此问题。在中，将此条件更改为，或者更好的是，将迭代最大值减少 1dedup1if s[i] != s[i+1] {if i+1 < uint32(len(s)) && s[i] != s[i+1] {for i := uint32(0); i < uint32(len(s))-1; i++ {接下来，编写一个函数来生成具有随机重复项的切片。func randSliceWithDups(max int) (ret []uint32) {    ret = randSlice(max / 2)    ret = append(ret, ret...)    sort.Slice(ret, func(i, j int) bool {        return ret[i] < ret[j]    })    return}写入获取切片，例如randSliceWithDups(50)[0 0 1 1 2 2 3 3 3 3 4 4 5 5 5 5 6 6 6 6 7 7 8 8 9 9 9 9 12 12 13 13 18 18 18 18 18 18 19 19 20 20 20 20 21 21 22 22 24 24]再次更新测试func TestDedup1_with_dups(t *testing.T) {    s := randSliceWithDups(10)    res := dedup1(s)    uniq := map[uint32]bool{}    for _, r := range res {        _, ok := uniq[r]        if ok {            t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)        }        uniq[r] = true    }}接下来，添加一个基准测试;它将帮助我们发现性能问题并随着时间的推移保持性能。func BenchmarkDedup1_1000(b *testing.B) {    s := randSliceWithDups(100)    b.ResetTimer()    b.ReportAllocs()    for i := 0; i < b.N; i++ {        _ = dedup1(s)    }}运行这个我们得到：$ go test -v . -bench=.=== RUN   TestDedup1--- PASS: TestDedup1 (0.00s)=== RUN   TestDedup1_with_dups--- PASS: TestDedup1_with_dups (0.00s)goos: linuxgoarch: amd64pkg: test/d/dupBenchmarkDedup1_1000BenchmarkDedup1_1000-4        172087          6353 ns/op        5504 B/op          2 allocs/opPASSok      test/d/dup  1.174s让我们陈述一下，每个人都发现阅读你的初始代码，甚至没有编写你正在分配的基准测试。这就提出了一个问题，即确定是否允许您就地修改输入切片。如果您可以将其更改到位，我们可能会利用这一点来防止该分配并加快您的程序。一种从头开始编写的解决方案（考虑在网站等极客网站上搜索普遍接受的解决方案），是迭代切片并维护下一个要写入的位置的索引。找到非重复项时，将非重复项保存到此最后一个位置，然后将该索引递增 1。最后，将切片向上返回为递增索引的值。func dedup2(s []uint32) []uint32 {    if len(s) < 2 {        return s    }    var e int    for i := 1; i < len(s); i++ {        if s[i] == s[i-1] {            continue        }        s[e] = s[i]        e++    }    return s[:e]}同样，添加测试和工作台，并检查结果。func TestDedup2(t *testing.T) {    s := randSlice(10)    res := dedup2(s)    uniq := map[uint32]bool{}    for _, r := range res {        _, ok := uniq[r]        if ok {            t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)        }        uniq[r] = true    }}func TestDedup2_with_dups(t *testing.T) {    s := randSliceWithDups(10)    res := dedup2(s)    uniq := map[uint32]bool{}    for _, r := range res {        _, ok := uniq[r]        if ok {            t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)        }        uniq[r] = true    }}func BenchmarkDedup2_1000(b *testing.B) {    s := randSliceWithDups(100)    b.ResetTimer()    b.ReportAllocs()    for i := 0; i < b.N; i++ {        _ = dedup2(s)    }}其产生：$ go test -v . -bench=.=== RUN   TestDedup1--- PASS: TestDedup1 (0.00s)=== RUN   TestDedup1_with_dups--- PASS: TestDedup1_with_dups (0.00s)=== RUN   TestDedup2--- PASS: TestDedup2 (0.00s)=== RUN   TestDedup2_with_dups--- PASS: TestDedup2_with_dups (0.00s)goos: linuxgoarch: amd64pkg: test/d/dupBenchmarkDedup1_1000BenchmarkDedup1_1000-4       1764574           673 ns/op         544 B/op          2 allocs/opBenchmarkDedup2_1000BenchmarkDedup2_1000-4       7758907           152 ns/op           0 B/op          0 allocs/opPASSok      test/d/dup  3.224s4倍的改进！凉！下一步是什么？接下来是找到一个更好的算法，它产生更少的执行，更少的查找等等。不过，最后一个版本包含一个错误！你发现它了吗？请参阅此测试，它比另一个测试更好，因为它不依赖于随机数，而是依赖于具有强相等性检查的静态值。使用这些类型的测试，您可以定制输入以检查细粒度情况。func TestDedup2_static(t *testing.T) {    type expectation struct {        input []uint32        want  []uint32    }    expectations := []expectation{        expectation{            input: []uint32{0, 0, 1, 2, 3, 3, 3, 4, 4, 5},            want:  []uint32{0, 1, 2, 3, 4, 5},        },        expectation{            input: []uint32{0, 1, 2, 3, 3, 3, 4, 4, 5},            want:  []uint32{0, 1, 2, 3, 4, 5},        },    }    for _, e := range expectations {        res := dedup2(e.input)        if !reflect.DeepEqual(res, e.want) {            t.Fatalf("invlaid result, wanted=%#v\ngot=%#v\n", e.want, res)        }    }}它使用表驱动器测试，如 https://dave.cheney.net/2013/06/09/writing-table-driven-tests-in-go 中所述让我们解决这个问题：func dedup2(s []uint32) []uint32 {    if len(s) < 2 {        return s    }    var e int = 1    for i := 1; i < len(s); i++ {        if s[i] == s[i-1] {            continue        }        s[e] = s[i]        e++    }    return s[:e]}

删除已排序 Go 切片重复项的最快方法

2回答