为什么元素级的添加在单独的循环中比在组合循环中要快得多?
a1, b1, c1d1
const int n = 100000;for (int j = 0; j < n; j++) {
a1[j] += b1[j];
c1[j] += d1[j];}for
for (int j = 0; j < n; j++) {
a1[j] += b1[j];}for (int j = 0; j < n; j++) {
c1[j] += d1[j];}movsd xmm0,mmword ptr [edx+18h]addsd xmm0,mmword ptr [ecx+20h]movsd mmword ptr [ecx+20h],xmm0 movsd xmm0,mmword ptr [esi+10h]addsd xmm0,mmword ptr [eax+30h]movsd mmword ptr [eax+30h],xmm0 movsd xmm0,mmword ptr [edx+20h]addsd xmm0,mmword ptr [ecx+28h]movsd mmword ptr [ecx+28h],xmm0 movsd xmm0,mmword ptr [esi+18h]addsd xmm0,mmword ptr [eax+38h]
addsd xmm0,mmword ptr [eax+28h]movsd mmword ptr [eax+28h],xmm0 movsd xmm0,mmword ptr [ecx+20h]addsd xmm0,mmword ptr [eax+30h]movsd mmword ptr [eax+30h],xmm0 movsd xmm0,mmword ptr [ecx+28h]addsd xmm0,mmword ptr [eax+38h]movsd mmword ptr [eax+38h],xmm0 movsd xmm0,mmword ptr [ecx+30h]addsd xmm0,mmword ptr [eax+40h]movsd mmword ptr [eax+40h],xmm0
您能提供一些关于导致不同缓存行为的详细信息,如下图中的五个区域所示吗?
通过为CPU提供类似的图表,指出CPU/缓存体系结构之间的差异也可能很有趣。
翻翻过去那场雪
凤凰求蛊
猛跑小猪
随时随地看视频慕课网APP