我正在使用具有 8 个内核的机器(具有“2,8 GHz Intel Core i7”处理器的 Mac),我可以看到正在运行fmt.Println(runtime.NumCPU()).
我已经实现了一个非常简单的工作池模型来同时处理一些进入池的请求。进程类型是“CPU 密集型”,我想感受一下在给 GO 更多内核时性能会提高多少。
所以代码如下
func Run(poolSize int, workSize int, loopSize int, maxCores int) {
runtime.GOMAXPROCS(maxCores)
var wg sync.WaitGroup
wg.Add(poolSize)
defer wg.Wait()
// this is the channel where we write the requests for work to be performed by the pool
workStream := make(chan int)
// cpuIntensiveWork simulates an CPU intensive process
var cpuIntensiveWork = func(input int) {
res := input
for i := 0; i < loopSize; i++ {
res = res + i
}
}
// worker is the function that gets fired by the pool
worker := func(wg *sync.WaitGroup, workStream chan int, id int) {
defer wg.Done()
for req := range workStream {
cpuIntensiveWork(req)
}
}
// launch the goroutines of the pool
for i := 0; i < poolSize; i++ {
go worker(&wg, workStream, i)
}
// feed the workStream until the end and then close the channel
for workItemNo := 0; workItemNo < workSize; workItemNo++ {
workStream <- workItemNo
}
close(workStream)
}
基准是这些
var numberOfWorkers = 100
var numberOfRequests = 1000
var loopSize = 100000
func Benchmark_1Core(b *testing.B) {
for i := 0; i < b.N; i++ {
Run(numberOfWorkers, numberOfRequests, loopSize, 1)
}
}
func Benchmark_2Cores(b *testing.B) {
for i := 0; i < b.N; i++ {
Run(numberOfWorkers, numberOfRequests, loopSize, 2)
}
}
func Benchmark_4Cores(b *testing.B) {
for i := 0; i < b.N; i++ {
Run(numberOfWorkers, numberOfRequests, loopSize, 4)
}
}
func Benchmark_8Cores(b *testing.B) {
for i := 0; i < b.N; i++ {
Run(numberOfWorkers, numberOfRequests, loopSize, 8)
}
}
运行基准测试我注意到,从 1 核到 2 核再到 4 核,性能几乎呈线性增长。但是我从 4 核到 8 核的性能差异非常有限。
这是预期的行为吗?如果是这样,根本原因是什么?
一只名叫tom的猫
相关分类