gpt4 book ai didi

go - 在 Go 中测量 FLOPS

转载 作者:IT王子 更新时间:2023-10-29 01:27:11 26 4
gpt4 key购买 nike

我想编写一个 go 程序来对我的 CPU 进行基准测试并计算出我的笔记本电脑的 GFLOPS。

func benchmarkFlopTime(){
num_operations := int(100000000)
var timeArray[] time.Duration;

var result float64
for i:=0; i < num_operations; i++ {
t1 := time.Now()
result = 1.0 + 312.232
elapsed := time.Since(t1)
timeArray = append(timeArray, elapsed)
result += 1.0
}

fmt.Println("Result (ns):", float64(sumTimeArray(timeArray))/float64(time.Duration(num_operations)))
}

Ouput1: Result (ns): 9.99604753ns

解释:

这段代码给我大约 0.1 GFLOPS ((1sec/10ns)/10^-9)。我知道我的 CPU 每个周期可以执行 8 次操作,而我只执行一次,所以我可以得到一个因子 8,假设我有 1GFLOPS

问题:

  • 不过,我的 2.5GHZ i7、8 核笔记本电脑的理论 GFLOPS 数应该是:8*2.5*8 = 160GFLOPS。我远远低于这个结果。

我明确表示在编译 go install -gcflags '-N -l' github.com/golang/cpu-benchmark 时我没有激活优化。

我们能解释这个远低于理论值的值吗? Go 可以衡量吗?

最佳答案

这是一个(简单的)Go 基准测试,它对一个循环和二十个浮点运算进行计时。

package main

import (
"fmt"
"math"
"runtime"
"testing"
"time"
)

var (
f float64
e float64 = math.E
pi float64 = math.Pi
)

const nFlop = 20 // benchmarkFloatOps

func benchmarkFloatOps(n int) {
for i := 0; i < n; i++ {
f = pi + e
f = pi - e
f = pi * e
f = pi / e
f = pi + e
f = pi - e
f = pi * e
f = pi / e
f = pi + e
f = pi - e
f = pi * e
f = pi / e
f = pi + e
f = pi - e
f = pi * e
f = pi / e
f = pi + e
f = pi - e
f = pi * e
f = pi / e
}
}

func BenchmarkFloatOps(b *testing.B) {
benchmarkFloatOps(b.N)
}

func Results(t time.Duration, n int, nFlop int) {
fmt.Println(
t, nFlop*n, "ops",
float64(t)/float64(time.Duration((nFlop*n))), "ns/op",
)
}

func main() {
runtime.GOMAXPROCS(1)

// testing benchmark
br := testing.Benchmark(BenchmarkFloatOps)
Results(br.T, br.N, nFlop)

// manual benchmark
n := br.N
start := time.Now()
benchmarkFloatOps(n)
end := time.Now()
elapsed := end.Sub(start)
Results(elapsed, n, nFlop)
}

输出:Intel i7-6700 CPU 3.40GHz:

1.296967371s 4000000000 ops 0.32424184275 ns/op
1.299078813s 4000000000 ops 0.32476970325 ns/op

benchmarkFloatOps 的伪代码:

$ go tool compile -S flops.go

"".benchmarkFloatOps t=1 size=592 value=0 args=0x8 locals=0x0
0x0000 00000 (flops.go:19) TEXT "".benchmarkFloatOps(SB), $0-8
0x0000 00000 (flops.go:19) NOP
0x0000 00000 (flops.go:19) NOP
0x0000 00000 (flops.go:19) MOVQ "".n+8(FP), CX
0x0005 00005 (flops.go:19) FUNCDATA $0, gclocals·5184031d3a32a42d85027f073f873668(SB)
0x0005 00005 (flops.go:19) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0005 00005 (flops.go:20) MOVQ $0, AX
0x0007 00007 (flops.go:20) CMPQ AX, CX
0x000a 00010 (flops.go:20) JGE $0, 588
0x0010 00016 (flops.go:21) MOVSD "".pi(SB), X0
0x0018 00024 (flops.go:21) MOVSD "".e(SB), X1
0x0020 00032 (flops.go:21) ADDSD X1, X0
0x0024 00036 (flops.go:21) MOVSD X0, "".f(SB)
0x002c 00044 (flops.go:22) MOVSD "".pi(SB), X0
0x0034 00052 (flops.go:22) MOVSD "".e(SB), X1
0x003c 00060 (flops.go:22) SUBSD X1, X0
0x0040 00064 (flops.go:22) MOVSD X0, "".f(SB)
0x0048 00072 (flops.go:23) MOVSD "".pi(SB), X0
0x0050 00080 (flops.go:23) MOVSD "".e(SB), X1
0x0058 00088 (flops.go:23) MULSD X1, X0
0x005c 00092 (flops.go:23) MOVSD X0, "".f(SB)
0x0064 00100 (flops.go:24) MOVSD "".pi(SB), X0
0x006c 00108 (flops.go:24) MOVSD "".e(SB), X1
0x0074 00116 (flops.go:24) DIVSD X1, X0
0x0078 00120 (flops.go:24) MOVSD X0, "".f(SB)
0x0080 00128 (flops.go:25) MOVSD "".pi(SB), X0
0x0088 00136 (flops.go:25) MOVSD "".e(SB), X1
0x0090 00144 (flops.go:25) ADDSD X1, X0
0x0094 00148 (flops.go:25) MOVSD X0, "".f(SB)
0x009c 00156 (flops.go:26) MOVSD "".pi(SB), X0
0x00a4 00164 (flops.go:26) MOVSD "".e(SB), X1
0x00ac 00172 (flops.go:26) SUBSD X1, X0
0x00b0 00176 (flops.go:26) MOVSD X0, "".f(SB)
0x00b8 00184 (flops.go:27) MOVSD "".pi(SB), X0
0x00c0 00192 (flops.go:27) MOVSD "".e(SB), X1
0x00c8 00200 (flops.go:27) MULSD X1, X0
0x00cc 00204 (flops.go:27) MOVSD X0, "".f(SB)
0x00d4 00212 (flops.go:28) MOVSD "".pi(SB), X0
0x00dc 00220 (flops.go:28) MOVSD "".e(SB), X1
0x00e4 00228 (flops.go:28) DIVSD X1, X0
0x00e8 00232 (flops.go:28) MOVSD X0, "".f(SB)
0x00f0 00240 (flops.go:29) MOVSD "".pi(SB), X0
0x00f8 00248 (flops.go:29) MOVSD "".e(SB), X1
0x0100 00256 (flops.go:29) ADDSD X1, X0
0x0104 00260 (flops.go:29) MOVSD X0, "".f(SB)
0x010c 00268 (flops.go:30) MOVSD "".pi(SB), X0
0x0114 00276 (flops.go:30) MOVSD "".e(SB), X1
0x011c 00284 (flops.go:30) SUBSD X1, X0
0x0120 00288 (flops.go:30) MOVSD X0, "".f(SB)
0x0128 00296 (flops.go:31) MOVSD "".pi(SB), X0
0x0130 00304 (flops.go:31) MOVSD "".e(SB), X1
0x0138 00312 (flops.go:31) MULSD X1, X0
0x013c 00316 (flops.go:31) MOVSD X0, "".f(SB)
0x0144 00324 (flops.go:32) MOVSD "".pi(SB), X0
0x014c 00332 (flops.go:32) MOVSD "".e(SB), X1
0x0154 00340 (flops.go:32) DIVSD X1, X0
0x0158 00344 (flops.go:32) MOVSD X0, "".f(SB)
0x0160 00352 (flops.go:33) MOVSD "".pi(SB), X0
0x0168 00360 (flops.go:33) MOVSD "".e(SB), X1
0x0170 00368 (flops.go:33) ADDSD X1, X0
0x0174 00372 (flops.go:33) MOVSD X0, "".f(SB)
0x017c 00380 (flops.go:34) MOVSD "".pi(SB), X0
0x0184 00388 (flops.go:34) MOVSD "".e(SB), X1
0x018c 00396 (flops.go:34) SUBSD X1, X0
0x0190 00400 (flops.go:34) MOVSD X0, "".f(SB)
0x0198 00408 (flops.go:35) MOVSD "".pi(SB), X0
0x01a0 00416 (flops.go:35) MOVSD "".e(SB), X1
0x01a8 00424 (flops.go:35) MULSD X1, X0
0x01ac 00428 (flops.go:35) MOVSD X0, "".f(SB)
0x01b4 00436 (flops.go:36) MOVSD "".pi(SB), X0
0x01bc 00444 (flops.go:36) MOVSD "".e(SB), X1
0x01c4 00452 (flops.go:36) DIVSD X1, X0
0x01c8 00456 (flops.go:36) MOVSD X0, "".f(SB)
0x01d0 00464 (flops.go:37) MOVSD "".pi(SB), X0
0x01d8 00472 (flops.go:37) MOVSD "".e(SB), X1
0x01e0 00480 (flops.go:37) ADDSD X1, X0
0x01e4 00484 (flops.go:37) MOVSD X0, "".f(SB)
0x01ec 00492 (flops.go:38) MOVSD "".pi(SB), X0
0x01f4 00500 (flops.go:38) MOVSD "".e(SB), X1
0x01fc 00508 (flops.go:38) SUBSD X1, X0
0x0200 00512 (flops.go:38) MOVSD X0, "".f(SB)
0x0208 00520 (flops.go:39) MOVSD "".pi(SB), X0
0x0210 00528 (flops.go:39) MOVSD "".e(SB), X1
0x0218 00536 (flops.go:39) MULSD X1, X0
0x021c 00540 (flops.go:39) MOVSD X0, "".f(SB)
0x0224 00548 (flops.go:40) MOVSD "".pi(SB), X0
0x022c 00556 (flops.go:40) MOVSD "".e(SB), X1
0x0234 00564 (flops.go:40) DIVSD X1, X0
0x0238 00568 (flops.go:40) MOVSD X0, "".f(SB)
0x0240 00576 (flops.go:20) INCQ AX
0x0243 00579 (flops.go:20) NOP
0x0243 00579 (flops.go:20) CMPQ AX, CX
0x0246 00582 (flops.go:20) JLT $0, 16
0x024c 00588 (flops.go:42) RET

benchmarkFloatOps 的目标代码:

$ go build flops.go && go tool objdump -s benchmarkFloatOps ./flops

TEXT main.benchmarkFloatOps(SB) /home/peter/Dropbox/gopath/src/so/benchmark/flops.go
flops.go:19 0x401000 488b4c2408 MOVQ 0x8(SP), CX
flops.go:20 0x401005 31c0 XORL AX, AX
flops.go:20 0x401007 4839c8 CMPQ CX, AX
flops.go:20 0x40100a 0f8d3c020000 JGE 0x40124c
flops.go:21 0x401010 f20f1005e8801b00 REPNE MOVSD_XMM 0x1b80e8(IP), X0
flops.go:21 0x401018 f20f100dd8801b00 REPNE MOVSD_XMM 0x1b80d8(IP), X1
flops.go:21 0x401020 f20f58c1 REPNE ADDSD X1, X0
flops.go:21 0x401024 f20f110544451e00 REPNE MOVSD_XMM X0, 0x1e4544(IP)
flops.go:22 0x40102c f20f1005cc801b00 REPNE MOVSD_XMM 0x1b80cc(IP), X0
flops.go:22 0x401034 f20f100dbc801b00 REPNE MOVSD_XMM 0x1b80bc(IP), X1
flops.go:22 0x40103c f20f5cc1 REPNE SUBSD X1, X0
flops.go:22 0x401040 f20f110528451e00 REPNE MOVSD_XMM X0, 0x1e4528(IP)
flops.go:23 0x401048 f20f1005b0801b00 REPNE MOVSD_XMM 0x1b80b0(IP), X0
flops.go:23 0x401050 f20f100da0801b00 REPNE MOVSD_XMM 0x1b80a0(IP), X1
flops.go:23 0x401058 f20f59c1 REPNE MULSD X1, X0
flops.go:23 0x40105c f20f11050c451e00 REPNE MOVSD_XMM X0, 0x1e450c(IP)
flops.go:24 0x401064 f20f100594801b00 REPNE MOVSD_XMM 0x1b8094(IP), X0
flops.go:24 0x40106c f20f100d84801b00 REPNE MOVSD_XMM 0x1b8084(IP), X1
flops.go:24 0x401074 f20f5ec1 REPNE DIVSD X1, X0
flops.go:24 0x401078 f20f1105f0441e00 REPNE MOVSD_XMM X0, 0x1e44f0(IP)
flops.go:25 0x401080 f20f100578801b00 REPNE MOVSD_XMM 0x1b8078(IP), X0
flops.go:25 0x401088 f20f100d68801b00 REPNE MOVSD_XMM 0x1b8068(IP), X1
flops.go:25 0x401090 f20f58c1 REPNE ADDSD X1, X0
flops.go:25 0x401094 f20f1105d4441e00 REPNE MOVSD_XMM X0, 0x1e44d4(IP)
flops.go:26 0x40109c f20f10055c801b00 REPNE MOVSD_XMM 0x1b805c(IP), X0
flops.go:26 0x4010a4 f20f100d4c801b00 REPNE MOVSD_XMM 0x1b804c(IP), X1
flops.go:26 0x4010ac f20f5cc1 REPNE SUBSD X1, X0
flops.go:26 0x4010b0 f20f1105b8441e00 REPNE MOVSD_XMM X0, 0x1e44b8(IP)
flops.go:27 0x4010b8 f20f100540801b00 REPNE MOVSD_XMM 0x1b8040(IP), X0
flops.go:27 0x4010c0 f20f100d30801b00 REPNE MOVSD_XMM 0x1b8030(IP), X1
flops.go:27 0x4010c8 f20f59c1 REPNE MULSD X1, X0
flops.go:27 0x4010cc f20f11059c441e00 REPNE MOVSD_XMM X0, 0x1e449c(IP)
flops.go:28 0x4010d4 f20f100524801b00 REPNE MOVSD_XMM 0x1b8024(IP), X0
flops.go:28 0x4010dc f20f100d14801b00 REPNE MOVSD_XMM 0x1b8014(IP), X1
flops.go:28 0x4010e4 f20f5ec1 REPNE DIVSD X1, X0
flops.go:28 0x4010e8 f20f110580441e00 REPNE MOVSD_XMM X0, 0x1e4480(IP)
flops.go:29 0x4010f0 f20f100508801b00 REPNE MOVSD_XMM 0x1b8008(IP), X0
flops.go:29 0x4010f8 f20f100df87f1b00 REPNE MOVSD_XMM 0x1b7ff8(IP), X1
flops.go:29 0x401100 f20f58c1 REPNE ADDSD X1, X0
flops.go:29 0x401104 f20f110564441e00 REPNE MOVSD_XMM X0, 0x1e4464(IP)
flops.go:30 0x40110c f20f1005ec7f1b00 REPNE MOVSD_XMM 0x1b7fec(IP), X0
flops.go:30 0x401114 f20f100ddc7f1b00 REPNE MOVSD_XMM 0x1b7fdc(IP), X1
flops.go:30 0x40111c f20f5cc1 REPNE SUBSD X1, X0
flops.go:30 0x401120 f20f110548441e00 REPNE MOVSD_XMM X0, 0x1e4448(IP)
flops.go:31 0x401128 f20f1005d07f1b00 REPNE MOVSD_XMM 0x1b7fd0(IP), X0
flops.go:31 0x401130 f20f100dc07f1b00 REPNE MOVSD_XMM 0x1b7fc0(IP), X1
flops.go:31 0x401138 f20f59c1 REPNE MULSD X1, X0
flops.go:31 0x40113c f20f11052c441e00 REPNE MOVSD_XMM X0, 0x1e442c(IP)
flops.go:32 0x401144 f20f1005b47f1b00 REPNE MOVSD_XMM 0x1b7fb4(IP), X0
flops.go:32 0x40114c f20f100da47f1b00 REPNE MOVSD_XMM 0x1b7fa4(IP), X1
flops.go:32 0x401154 f20f5ec1 REPNE DIVSD X1, X0
flops.go:32 0x401158 f20f110510441e00 REPNE MOVSD_XMM X0, 0x1e4410(IP)
flops.go:33 0x401160 f20f1005987f1b00 REPNE MOVSD_XMM 0x1b7f98(IP), X0
flops.go:33 0x401168 f20f100d887f1b00 REPNE MOVSD_XMM 0x1b7f88(IP), X1
flops.go:33 0x401170 f20f58c1 REPNE ADDSD X1, X0
flops.go:33 0x401174 f20f1105f4431e00 REPNE MOVSD_XMM X0, 0x1e43f4(IP)
flops.go:34 0x40117c f20f10057c7f1b00 REPNE MOVSD_XMM 0x1b7f7c(IP), X0
flops.go:34 0x401184 f20f100d6c7f1b00 REPNE MOVSD_XMM 0x1b7f6c(IP), X1
flops.go:34 0x40118c f20f5cc1 REPNE SUBSD X1, X0
flops.go:34 0x401190 f20f1105d8431e00 REPNE MOVSD_XMM X0, 0x1e43d8(IP)
flops.go:35 0x401198 f20f1005607f1b00 REPNE MOVSD_XMM 0x1b7f60(IP), X0
flops.go:35 0x4011a0 f20f100d507f1b00 REPNE MOVSD_XMM 0x1b7f50(IP), X1
flops.go:35 0x4011a8 f20f59c1 REPNE MULSD X1, X0
flops.go:35 0x4011ac f20f1105bc431e00 REPNE MOVSD_XMM X0, 0x1e43bc(IP)
flops.go:36 0x4011b4 f20f1005447f1b00 REPNE MOVSD_XMM 0x1b7f44(IP), X0
flops.go:36 0x4011bc f20f100d347f1b00 REPNE MOVSD_XMM 0x1b7f34(IP), X1
flops.go:36 0x4011c4 f20f5ec1 REPNE DIVSD X1, X0
flops.go:36 0x4011c8 f20f1105a0431e00 REPNE MOVSD_XMM X0, 0x1e43a0(IP)
flops.go:37 0x4011d0 f20f1005287f1b00 REPNE MOVSD_XMM 0x1b7f28(IP), X0
flops.go:37 0x4011d8 f20f100d187f1b00 REPNE MOVSD_XMM 0x1b7f18(IP), X1
flops.go:37 0x4011e0 f20f58c1 REPNE ADDSD X1, X0
flops.go:37 0x4011e4 f20f110584431e00 REPNE MOVSD_XMM X0, 0x1e4384(IP)
flops.go:38 0x4011ec f20f10050c7f1b00 REPNE MOVSD_XMM 0x1b7f0c(IP), X0
flops.go:38 0x4011f4 f20f100dfc7e1b00 REPNE MOVSD_XMM 0x1b7efc(IP), X1
flops.go:38 0x4011fc f20f5cc1 REPNE SUBSD X1, X0
flops.go:38 0x401200 f20f110568431e00 REPNE MOVSD_XMM X0, 0x1e4368(IP)
flops.go:39 0x401208 f20f1005f07e1b00 REPNE MOVSD_XMM 0x1b7ef0(IP), X0
flops.go:39 0x401210 f20f100de07e1b00 REPNE MOVSD_XMM 0x1b7ee0(IP), X1
flops.go:39 0x401218 f20f59c1 REPNE MULSD X1, X0
flops.go:39 0x40121c f20f11054c431e00 REPNE MOVSD_XMM X0, 0x1e434c(IP)
flops.go:40 0x401224 f20f1005d47e1b00 REPNE MOVSD_XMM 0x1b7ed4(IP), X0
flops.go:40 0x40122c f20f100dc47e1b00 REPNE MOVSD_XMM 0x1b7ec4(IP), X1
flops.go:40 0x401234 f20f5ec1 REPNE DIVSD X1, X0
flops.go:40 0x401238 f20f110530431e00 REPNE MOVSD_XMM X0, 0x1e4330(IP)
flops.go:20 0x401240 48ffc0 INCQ AX
flops.go:20 0x401243 4839c8 CMPQ CX, AX
flops.go:20 0x401246 0f8cc4fdffff JL 0x401010
flops.go:42 0x40124c c3 RET

关于go - 在 Go 中测量 FLOPS,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/35162611/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com