删除已排序 Go 切片重复项的最快方法

 mySlice := make([]uint32, 0, 4294967290)


// ...


        // Sort the slice

        sort.Slice(mySlice, func(i, j int) bool {

            x := mySlice[i]

            y := mySlice[j]

            return x < y

        })

删除切片重复项的最快方法是什么?


如何利用切片已排序的事实?


更新

我想出了这个:


func RemoveDuplicates(s []uint32) {

    if len(s) < 2 {

        return

    }

    tmp := make([]uint32, 0, len(s))


    for i := uint32(0); i < uint32(len(s)); i++ {

        // If current is not equal to next then store the current

        if s[i] != s[i+1] {

            tmp = append(tmp, s[i])

        }

    }


    // The last must be stored

    // Note that if it was repeated, the duplicates are NOT stored before

    tmp = append(tmp, s[len(s)-1])


    // Modify original slice

    s = nil

    s = append(s, tmp...)

}

有什么错误吗?有什么错误吗?有什么方法可以改进吗?


更新

如@mh-cbon 所指出的,正确的循环最大值应为:i < uint32(len(s)) - 1


for i := uint32(0); i < uint32(len(s)) - 1; i++ {


蓝山帝景
浏览 97回答 2
2回答

隔江千里

不是关于最快的答案,而是关于使用Go语言来优化一段代码的方法的逐步回答。有关什么是最快的非常正式的见解,请参阅&nbsp;https://stackoverflow.com/a/6072100/4466350您的代码有问题。始终编写测试。一、让我们用写一个主package mainimport (&nbsp; &nbsp; "math/rand"&nbsp; &nbsp; "sort")func main() {}func randSlice(max int) (ret []uint32) {&nbsp; &nbsp; // we should check that max does not exceed maxUINT32&nbsp; &nbsp; ret = make([]uint32, 0, max)&nbsp; &nbsp; r := rand.New(rand.NewSource(99))&nbsp; &nbsp; for i := 0; i < max; i++ {&nbsp; &nbsp; &nbsp; &nbsp; ret = append(ret, uint32(r.Intn(max)))&nbsp; &nbsp; }&nbsp; &nbsp; sort.Slice(ret, func(i, j int) bool {&nbsp; &nbsp; &nbsp; &nbsp; return ret[i] < ret[j]&nbsp; &nbsp; })&nbsp; &nbsp; return}func dedup1(s []uint32) []uint32 {&nbsp; &nbsp; if len(s) < 2 {&nbsp; &nbsp; &nbsp; &nbsp; return s&nbsp; &nbsp; }&nbsp; &nbsp; tmp := make([]uint32, 0, len(s))&nbsp; &nbsp; for i := uint32(0); i < uint32(len(s)); i++ {&nbsp; &nbsp; &nbsp; &nbsp; // If current is not equal to next then store the current&nbsp; &nbsp; &nbsp; &nbsp; if s[i] != s[i+1] {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; tmp = append(tmp, s[i])&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; &nbsp; // The last must be stored&nbsp; &nbsp; // Note that if it was repeated, the duplicates are NOT stored before&nbsp; &nbsp; tmp = append(tmp, s[len(s)-1])&nbsp; &nbsp; // Modify original slice&nbsp; &nbsp; s = nil&nbsp; &nbsp; s = append(s, tmp...)&nbsp; &nbsp; return s}以及随附的测试package mainimport "testing"func TestDedup1(t *testing.T) {&nbsp; &nbsp; s := randSlice(10)&nbsp; &nbsp; res := dedup1(s)&nbsp; &nbsp; uniq := map[uint32]bool{}&nbsp; &nbsp; for _, r := range res {&nbsp; &nbsp; &nbsp; &nbsp; _, ok := uniq[r]&nbsp; &nbsp; &nbsp; &nbsp; if ok {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; uniq[r] = true&nbsp; &nbsp; }}运行这个我们得到$ go test -v .&nbsp;=== RUN&nbsp; &nbsp;TestDedup1--- FAIL: TestDedup1 (0.00s)panic: runtime error: index out of range [10] with length 10 [recovered]&nbsp; &nbsp; panic: runtime error: index out of range [10] with length 10goroutine 18 [running]:testing.tRunner.func1.1(0x536680, 0xc0000da040)&nbsp; &nbsp; /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1076 +0x30dtesting.tRunner.func1(0xc000082600)&nbsp; &nbsp; /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1079 +0x41apanic(0x536680, 0xc0000da040)&nbsp; &nbsp; /home/mh-cbon/.gvm/gos/go1.15.2/src/runtime/panic.go:969 +0x175test/d/dup.dedup1(0xc000094060, 0xa, 0xa, 0xa, 0x6124a0, 0xc00003c770)&nbsp; &nbsp; /home/mh-cbon/gow/src/test/d/dup/main.go:32 +0x248test/d/dup.TestDedup1(0xc000082600)&nbsp; &nbsp; /home/mh-cbon/gow/src/test/d/dup/main_test.go:7 +0x70testing.tRunner(0xc000082600, 0x54fbf0)&nbsp; &nbsp; /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1127 +0xefcreated by testing.(*T).Run&nbsp; &nbsp; /home/mh-cbon/.gvm/gos/go1.15.2/src/testing/testing.go:1178 +0x386FAIL&nbsp; &nbsp; test/d/dup&nbsp; 0.006sFAIL我们通过适当地检查切片边界来解决此问题。在 中,将此条件更改为 ,或者更好的是,将迭代最大值减少 1dedup1if s[i] != s[i+1] {if i+1 < uint32(len(s)) && s[i] != s[i+1] {for i := uint32(0); i < uint32(len(s))-1; i++ {接下来,编写一个函数来生成具有随机重复项的切片。func randSliceWithDups(max int) (ret []uint32) {&nbsp; &nbsp; ret = randSlice(max / 2)&nbsp; &nbsp; ret = append(ret, ret...)&nbsp; &nbsp; sort.Slice(ret, func(i, j int) bool {&nbsp; &nbsp; &nbsp; &nbsp; return ret[i] < ret[j]&nbsp; &nbsp; })&nbsp; &nbsp; return}写入获取切片,例如randSliceWithDups(50)[0 0 1 1 2 2 3 3 3 3 4 4 5 5 5 5 6 6 6 6 7 7 8 8 9 9 9 9 12 12 13 13 18 18 18 18 18 18 19 19 20 20 20 20 21 21 22 22 24 24]再次更新测试func TestDedup1_with_dups(t *testing.T) {&nbsp; &nbsp; s := randSliceWithDups(10)&nbsp; &nbsp; res := dedup1(s)&nbsp; &nbsp; uniq := map[uint32]bool{}&nbsp; &nbsp; for _, r := range res {&nbsp; &nbsp; &nbsp; &nbsp; _, ok := uniq[r]&nbsp; &nbsp; &nbsp; &nbsp; if ok {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; uniq[r] = true&nbsp; &nbsp; }}接下来,添加一个基准测试;它将帮助我们发现性能问题并随着时间的推移保持性能。func BenchmarkDedup1_1000(b *testing.B) {&nbsp; &nbsp; s := randSliceWithDups(100)&nbsp; &nbsp; b.ResetTimer()&nbsp; &nbsp; b.ReportAllocs()&nbsp; &nbsp; for i := 0; i < b.N; i++ {&nbsp; &nbsp; &nbsp; &nbsp; _ = dedup1(s)&nbsp; &nbsp; }}运行这个我们得到:$ go test -v . -bench=.=== RUN&nbsp; &nbsp;TestDedup1--- PASS: TestDedup1 (0.00s)=== RUN&nbsp; &nbsp;TestDedup1_with_dups--- PASS: TestDedup1_with_dups (0.00s)goos: linuxgoarch: amd64pkg: test/d/dupBenchmarkDedup1_1000BenchmarkDedup1_1000-4&nbsp; &nbsp; &nbsp; &nbsp; 172087&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 6353 ns/op&nbsp; &nbsp; &nbsp; &nbsp; 5504 B/op&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 2 allocs/opPASSok&nbsp; &nbsp; &nbsp; test/d/dup&nbsp; 1.174s让我们陈述一下,每个人都发现阅读你的初始代码,甚至没有编写你正在分配的基准测试。这就提出了一个问题,即确定是否允许您就地修改输入切片。如果您可以将其更改到位,我们可能会利用这一点来防止该分配并加快您的程序。一种从头开始编写的解决方案(考虑在网站等极客网站上搜索普遍接受的解决方案),是迭代切片并维护下一个要写入的位置的索引。找到非重复项时,将非重复项保存到此最后一个位置,然后将该索引递增 1。最后,将切片向上返回为递增索引的值。func dedup2(s []uint32) []uint32 {&nbsp; &nbsp; if len(s) < 2 {&nbsp; &nbsp; &nbsp; &nbsp; return s&nbsp; &nbsp; }&nbsp; &nbsp; var e int&nbsp; &nbsp; for i := 1; i < len(s); i++ {&nbsp; &nbsp; &nbsp; &nbsp; if s[i] == s[i-1] {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; continue&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; s[e] = s[i]&nbsp; &nbsp; &nbsp; &nbsp; e++&nbsp; &nbsp; }&nbsp; &nbsp; return s[:e]}同样,添加测试和工作台,并检查结果。func TestDedup2(t *testing.T) {&nbsp; &nbsp; s := randSlice(10)&nbsp; &nbsp; res := dedup2(s)&nbsp; &nbsp; uniq := map[uint32]bool{}&nbsp; &nbsp; for _, r := range res {&nbsp; &nbsp; &nbsp; &nbsp; _, ok := uniq[r]&nbsp; &nbsp; &nbsp; &nbsp; if ok {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; uniq[r] = true&nbsp; &nbsp; }}func TestDedup2_with_dups(t *testing.T) {&nbsp; &nbsp; s := randSliceWithDups(10)&nbsp; &nbsp; res := dedup2(s)&nbsp; &nbsp; uniq := map[uint32]bool{}&nbsp; &nbsp; for _, r := range res {&nbsp; &nbsp; &nbsp; &nbsp; _, ok := uniq[r]&nbsp; &nbsp; &nbsp; &nbsp; if ok {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; t.Fatalf("found duplicates\ninput=%#v\nresult=%#v\n", s, res)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; uniq[r] = true&nbsp; &nbsp; }}func BenchmarkDedup2_1000(b *testing.B) {&nbsp; &nbsp; s := randSliceWithDups(100)&nbsp; &nbsp; b.ResetTimer()&nbsp; &nbsp; b.ReportAllocs()&nbsp; &nbsp; for i := 0; i < b.N; i++ {&nbsp; &nbsp; &nbsp; &nbsp; _ = dedup2(s)&nbsp; &nbsp; }}其产生:$ go test -v . -bench=.=== RUN&nbsp; &nbsp;TestDedup1--- PASS: TestDedup1 (0.00s)=== RUN&nbsp; &nbsp;TestDedup1_with_dups--- PASS: TestDedup1_with_dups (0.00s)=== RUN&nbsp; &nbsp;TestDedup2--- PASS: TestDedup2 (0.00s)=== RUN&nbsp; &nbsp;TestDedup2_with_dups--- PASS: TestDedup2_with_dups (0.00s)goos: linuxgoarch: amd64pkg: test/d/dupBenchmarkDedup1_1000BenchmarkDedup1_1000-4&nbsp; &nbsp; &nbsp; &nbsp;1764574&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;673 ns/op&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;544 B/op&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 2 allocs/opBenchmarkDedup2_1000BenchmarkDedup2_1000-4&nbsp; &nbsp; &nbsp; &nbsp;7758907&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;152 ns/op&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;0 B/op&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 0 allocs/opPASSok&nbsp; &nbsp; &nbsp; test/d/dup&nbsp; 3.224s4倍的改进!凉!下一步是什么?接下来是找到一个更好的算法,它产生更少的执行,更少的查找等等。不过,最后一个版本包含一个错误!你发现它了吗?请参阅此测试,它比另一个测试更好,因为它不依赖于随机数,而是依赖于具有强相等性检查的静态值。使用这些类型的测试,您可以定制输入以检查细粒度情况。func TestDedup2_static(t *testing.T) {&nbsp; &nbsp; type expectation struct {&nbsp; &nbsp; &nbsp; &nbsp; input []uint32&nbsp; &nbsp; &nbsp; &nbsp; want&nbsp; []uint32&nbsp; &nbsp; }&nbsp; &nbsp; expectations := []expectation{&nbsp; &nbsp; &nbsp; &nbsp; expectation{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; input: []uint32{0, 0, 1, 2, 3, 3, 3, 4, 4, 5},&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; want:&nbsp; []uint32{0, 1, 2, 3, 4, 5},&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; expectation{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; input: []uint32{0, 1, 2, 3, 3, 3, 4, 4, 5},&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; want:&nbsp; []uint32{0, 1, 2, 3, 4, 5},&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; }&nbsp; &nbsp; for _, e := range expectations {&nbsp; &nbsp; &nbsp; &nbsp; res := dedup2(e.input)&nbsp; &nbsp; &nbsp; &nbsp; if !reflect.DeepEqual(res, e.want) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; t.Fatalf("invlaid result, wanted=%#v\ngot=%#v\n", e.want, res)&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }}它使用表驱动器测试,如 https://dave.cheney.net/2013/06/09/writing-table-driven-tests-in-go 中所述让我们解决这个问题:func dedup2(s []uint32) []uint32 {&nbsp; &nbsp; if len(s) < 2 {&nbsp; &nbsp; &nbsp; &nbsp; return s&nbsp; &nbsp; }&nbsp; &nbsp; var e int = 1&nbsp; &nbsp; for i := 1; i < len(s); i++ {&nbsp; &nbsp; &nbsp; &nbsp; if s[i] == s[i-1] {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; continue&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; s[e] = s[i]&nbsp; &nbsp; &nbsp; &nbsp; e++&nbsp; &nbsp; }&nbsp; &nbsp; return s[:e]}

炎炎设计

要删除切片的重复元素,您可以创建一个映射并将切片值指定为映射的键,然后循环访问映射并将键值附加到新切片。下面是相同逻辑的代码:package mainimport (&nbsp; &nbsp; "fmt"&nbsp; &nbsp; "sort")func removeDupe(slc []int) []int {&nbsp; &nbsp; var tmpSlice []int&nbsp; &nbsp; chkMap := make(map[int]bool)&nbsp; &nbsp; for _, val := range slc {&nbsp; &nbsp; &nbsp; &nbsp; chkMap[val] = true&nbsp; &nbsp; }&nbsp; &nbsp; for k, _ := range chkMap {&nbsp; &nbsp; &nbsp; &nbsp; tmpSlice = append(tmpSlice, k)&nbsp; &nbsp; }&nbsp; &nbsp; sort.Ints(tmpSlice)&nbsp; &nbsp; return tmpSlice}func main() {&nbsp; &nbsp; mySlice := []int{1, 2, 3, 4, 5, 1, 2, 3, 9, 0}&nbsp; &nbsp; formattedSlice := removeDupe(mySlice)&nbsp; &nbsp; fmt.Println(formattedSlice)}&nbsp;输出:[0 1 2 3 4 5 9]
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Go