python list iterate, concat 性能问题

我通过两种方式编写了一个简单的插入排序函数。


def binary_search(the_array, item, start, end):

    if start == end:

        if the_array[start] > item:

            return start

        else:

            return start + 1

    if start > end:

        return start


    mid = round((start + end)/ 2)


    if the_array[mid] < item:

        return binary_search(the_array, item, mid + 1, end)


    elif the_array[mid] > item:

        return binary_search(the_array, item, start, mid - 1)


    else:

        return mid


def insertion_sort(the_array):

    l = len(the_array)

    start = time.process_time()   

    for index in range(1, l):

        value = the_array[index]

        pos = binary_search(the_array, value, 0, index - 1)

        #Way A:

        #for p in range(index,pos,-1):

        #    the_array[p] = the_array[p-1]   

        #the_array[pos] = value

        #Way B:

        #the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]

    end = time.process_time()

    print("Cost time:",end-start)

    return the_array

A:


 for p in range(index,pos,-1):

     the_array[p] = the_array[p-1]   

 the_array[pos] = value

乙:


the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]

我不擅长python 。在我的性能测试中,B方式比A方式快。我使用python time.process_time来获取时间偏移。


那么问题为什么B方式比A方式快? 请帮助我。谢谢


更新:我使用 10000 个随机整数来测试 A 和 B。


for x in range(0,10000):

    data.append(random.randint(0,10000))

A路花费2.3125s B路花费0.890625s。


一天后,没有答案告诉我为什么,所以我决定阅读一本关于此的书。在“高性能 Python”中,我找到了原因的答案!如果你想知道你可以看到我自己的答案。


绝地无双
浏览 328回答 2
2回答

HUH函数

为什么迭代范围比列表连接慢?它有以下两个原因:在python中迭代一个范围不能执行自动向量化。操作一一执行。CPU 浪费时间等待内存操作。Python list 存储的是数据的指针而不是数据本身。所以每次我们通过索引访问数据时,我们都会执行额外的间接操作。它不是缓存友好的。所以虽然concatenate list会分配更多的内存,但它是完整地操作内存而不是一个一个。CPU不会浪费时间等待内存操作。我找到了一种Pythonic方式来完成这项工作。the_array[pos+1:index+1] = the_array[pos:index] the_array[pos] = value它比 A 和 B 快。而且仍然很容易理解。

富国沪深

Python 是一种非常高级的解释性语言。作为简单性和可读性的交换,像迭代range生成器这样的琐碎任务可能会增加可感知的开销。相比之下,列表理解和切片以高性能实现。尽管它们仅相差一个常数因子,但实际上您可以变得更快:import randomimport time&nbsp;def binary_search(the_array, item, start, end):&nbsp; &nbsp; if start == end:&nbsp; &nbsp; &nbsp; &nbsp; if the_array[start] > item:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; return start&nbsp; &nbsp; &nbsp; &nbsp; else:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; return start + 1&nbsp; &nbsp; if start > end:&nbsp; &nbsp; &nbsp; &nbsp; return start&nbsp; &nbsp; mid = round((start + end)/ 2)&nbsp; &nbsp; if the_array[mid] < item:&nbsp; &nbsp; &nbsp; &nbsp; return binary_search(the_array, item, mid + 1, end)&nbsp; &nbsp; elif the_array[mid] > item:&nbsp; &nbsp; &nbsp; &nbsp; return binary_search(the_array, item, start, mid - 1)&nbsp; &nbsp; else:&nbsp; &nbsp; &nbsp; &nbsp; return middef insertion_sort_a(the_array):&nbsp; &nbsp; l = len(the_array)&nbsp; &nbsp; start = time.process_time()&nbsp; &nbsp;&nbsp; &nbsp; for index in range(1, l):&nbsp; &nbsp; &nbsp; &nbsp; value = the_array[index]&nbsp; &nbsp; &nbsp; &nbsp; pos = binary_search(the_array, value, 0, index - 1)&nbsp; &nbsp; &nbsp; &nbsp; for p in range(index,pos,-1):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;the_array[p] = the_array[p-1]&nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; the_array[pos] = value&nbsp; &nbsp; end = time.process_time()&nbsp; &nbsp; print("Cost time:",end-start,end="\t")&nbsp; &nbsp; return the_arraydef insertion_sort_b(the_array):&nbsp; &nbsp; l = len(the_array)&nbsp; &nbsp; start = time.process_time()&nbsp; &nbsp;&nbsp; &nbsp; for index in range(1, l):&nbsp; &nbsp; &nbsp; &nbsp; value = the_array[index]&nbsp; &nbsp; &nbsp; &nbsp; pos = binary_search(the_array, value, 0, index - 1)&nbsp; &nbsp; &nbsp; &nbsp; the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:]&nbsp; &nbsp; end = time.process_time()&nbsp; &nbsp; print(end-start, end="\t")&nbsp; &nbsp; return the_arraydef insertion_sort_c(the_array):&nbsp; &nbsp; l = len(the_array)&nbsp; &nbsp; start = time.process_time()&nbsp; &nbsp;&nbsp; &nbsp; for index in range(1, l):&nbsp; &nbsp; &nbsp; &nbsp; value = the_array[index]&nbsp; &nbsp; &nbsp; &nbsp; while index > 0 and the_array[index-1] > value:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; the_array[index] = the_array[index-1]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; index -= 1&nbsp; &nbsp; &nbsp; &nbsp; the_array[index] = value&nbsp; &nbsp; end = time.process_time()&nbsp; &nbsp; print(end-start, end="\t")&nbsp; &nbsp; return the_arraydef insertion_sort_d(the_array):&nbsp; &nbsp; l = len(the_array)&nbsp; &nbsp; start = time.process_time()&nbsp; &nbsp;&nbsp; &nbsp; for index in range(1, l):&nbsp; &nbsp; &nbsp; &nbsp; value = the_array[index]&nbsp; &nbsp; &nbsp; &nbsp; pos = binary_search(the_array, value, 0, index - 1)&nbsp; &nbsp; &nbsp; &nbsp; the_array[pos+1:index+1] = the_array[pos:index]&nbsp; &nbsp; &nbsp; &nbsp; the_array[pos] = value&nbsp; &nbsp; end = time.process_time()&nbsp; &nbsp; print(end-start)&nbsp; &nbsp; return the_arrayfor n in range(20):&nbsp; &nbsp; n = 2**n&nbsp; &nbsp; data = []&nbsp; &nbsp; for x in range(0,n):&nbsp; &nbsp; &nbsp; &nbsp; data.append(random.randint(0,n))&nbsp; &nbsp; a = insertion_sort_a(list(data))&nbsp; &nbsp; assert all(a[i] <= a[i+1] for i in range(len(a)-1))&nbsp; &nbsp; b = insertion_sort_b(list(data))&nbsp; &nbsp; assert all(b[i] <= b[i+1] for i in range(len(b)-1))&nbsp; &nbsp; c = insertion_sort_c(list(data))&nbsp; &nbsp; assert all(c[i] <= c[i+1] for i in range(len(c)-1))&nbsp; &nbsp; d = insertion_sort_d(list(data))&nbsp; &nbsp; assert all(d[i] <= d[i+1] for i in range(len(d)-1))&nbsp; &nbsp; assert a == b&nbsp; &nbsp; assert b == c&nbsp; &nbsp; assert c == d
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python