gpt4 book ai didi

java - Java中 "if"和 "if else"的不同性能

转载 作者:行者123 更新时间:2023-12-03 11:19:21 24 4
gpt4 key购买 nike

我注意到if else/三元(condition ? a : b)分配比if only语句中的条件分配要快。我在不同的JDK上执行了JMH基准测试,但我将专注于JDK 12。
(操作数/秒,越高越好)
JMH benchmark
源代码:

@State(Scope.Benchmark)
public class FindMaxBenchmark {
public static int SIZE = 1_000_000;

@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_if(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;

for (int i = 0; i < data.length; i++) {
if (data[i] > result) {
result = data[i];
}
}

bh.consume(result);
}

@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_if_else(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;

for (int i = 0; i < data.length; i++) {
if (data[i] > result) {
result = data[i];
} else {
result = result;
}
}

bh.consume(result);
}

@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_ternary(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;

for (int i = 0; i < data.length; i++) {
result = data[i] > result ? data[i] : result;
}

bh.consume(result);
}

@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_intrinsicMax(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;

for (int i = 0; i < data.length; i++) {
result = Math.max(data[i], result);
}

bh.consume(result);
}

@State(Scope.Thread)
public static class Mock {
private int[] tab = new int[SIZE];

public int[] getTab() {
return tab;
}

@Setup(Level.Iteration)
public void setup() {
Random r = new Random();
this.tab = r.ints(SIZE).toArray();
}
}
}
findMax_if_else perfasm输出(三进制几乎相同):
c2, level 4, codes.dbg.FindMaxBenchmark::findMax_if_else, version 493 (165 bytes)

0x00007fc7a8671a6b: cmp r8d,ebp
╭ 0x00007fc7a8671a6e: jae 0x00007fc7a8671b3d
│ 0x00007fc7a8671a74: mov edx,DWORD PTR [r9+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
│ 0x00007fc7a8671a78: cmp edx,0x80000000
│╭ 0x00007fc7a8671a7e: jg 0x00007fc7a8671a85 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
││ 0x00007fc7a8671a80: mov edx,0x80000000 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
│↘ 0x00007fc7a8671a85: mov ebx,ebp
0.02% │ 0x00007fc7a8671a87: add ebx,0xfffffffd
│ 0x00007fc7a8671a8a: cmp r8d,ebx
│ 0x00007fc7a8671a8d: cmovl ebx,r11d
│ 0x00007fc7a8671a91: mov r8d,0x1
0.00% │ 0x00007fc7a8671a97: cmp ebx,0x1
│ ╭ 0x00007fc7a8671a9a: jle 0x00007fc7a8671b00
│ │ 0x00007fc7a8671a9c: mov rdi,r9 ;*goto {reexecute=0 rethrow=0 return_oop=0}
│ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
│ │╭ 0x00007fc7a8671a9f: jmp 0x00007fc7a8671ab9
0.01% │ ││ ↗ 0x00007fc7a8671aa1: mov edx,ecx
│ ││ │ 0x00007fc7a8671aa3: nop DWORD PTR [rax+0x0]
│ ││ │ 0x00007fc7a8671aaa: nop WORD PTR [rax+rax*1+0x0]
8.06% │ ││ ↗│ 0x00007fc7a8671ab0: add r8d,0x4 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ ││ ││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
11.38% │ ││ ││ 0x00007fc7a8671ab4: cmp r8d,ebx
13.63% │ ││╭ ││ 0x00007fc7a8671ab7: jge 0x00007fc7a8671af1 ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
│ │││ ││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
3.02% │ │↘│ ││ ↗ 0x00007fc7a8671ab9: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ │ ││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
8.53% │ │ │ ││ │ 0x00007fc7a8671abe: cmp r11d,edx
4.54% │ │ │╭ ││ │ 0x00007fc7a8671ac1: jg 0x00007fc7a8671ae2 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ │ ││ ││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
4.96% │ │ ││ ││↗ │ 0x00007fc7a8671ac3: mov r11d,DWORD PTR [r9+r8*4+0x14] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ ││ │││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
3.73% │ │ ││ │││ │ 0x00007fc7a8671ac8: cmp r11d,edx
9.19% │ │ ││╭ │││ │ 0x00007fc7a8671acb: jg 0x00007fc7a8671ae7 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ │ │││ │││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
3.70% │ │ │││ │││↗ │ 0x00007fc7a8671acd: mov r11d,DWORD PTR [r9+r8*4+0x18] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ │││ ││││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
4.96% │ │ │││ ││││ │ 0x00007fc7a8671ad2: cmp r11d,edx
4.45% │ │ │││╭││││ │ 0x00007fc7a8671ad5: jg 0x00007fc7a8671aec ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ │ ││││││││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
8.55% │ │ ││││││││↗│ 0x00007fc7a8671ad7: mov ecx,DWORD PTR [r9+r8*4+0x1c] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ ││││││││││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
6.11% │ │ ││││││││││ 0x00007fc7a8671adc: cmp ecx,edx
2.48% │ │ ││││╰│││││ 0x00007fc7a8671ade: jle 0x00007fc7a8671ab0 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
│ │ ││││ │││││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
│ │ ││││ ╰││││ 0x00007fc7a8671ae0: jmp 0x00007fc7a8671aa1
│ │ │↘││ ││││ 0x00007fc7a8671ae2: mov edx,r11d
0.00% │ │ │ ││ ╰│││ 0x00007fc7a8671ae5: jmp 0x00007fc7a8671ac3
0.00% │ │ │ ↘│ │││ 0x00007fc7a8671ae7: mov edx,r11d
0.00% │ │ │ │ ╰││ 0x00007fc7a8671aea: jmp 0x00007fc7a8671acd
0.00% │ │ │ ↘ ││ 0x00007fc7a8671aec: mov edx,r11d
0.00% │ │ │ ╰│ 0x00007fc7a8671aef: jmp 0x00007fc7a8671ad7
│ │ ↘ │ 0x00007fc7a8671af1: mov r11,QWORD PTR [r15+0x108] ; ImmutableOopMap{r10=Oop r9=NarrowOop rdi=Oop }
│ │ │ ;*goto {reexecute=1 rethrow=0 return_oop=0}
│ │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
│ │ │ 0x00007fc7a8671af8: test DWORD PTR [r11],eax ;*goto {reexecute=0 rethrow=0 return_oop=0}
│ │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
│ │ │ ; {poll}
│ │ │ 0x00007fc7a8671afb: cmp r8d,ebx
0.00% │ │ ╰ 0x00007fc7a8671afe: jl 0x00007fc7a8671ab9
│ ↘ 0x00007fc7a8671b00: cmp r8d,ebp
0.00% │ ╭ 0x00007fc7a8671b03: jge 0x00007fc7a8671b1a
│ │ 0x00007fc7a8671b05: data16 xchg ax,ax ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
│ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
│ │ ↗ 0x00007fc7a8671b08: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
0.01% │ │ │ 0x00007fc7a8671b0d: cmp r11d,edx
│ │╭│ 0x00007fc7a8671b10: jg 0x00007fc7a8671b38
│ │││↗ 0x00007fc7a8671b12: inc r8d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ ││││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
│ ││││ 0x00007fc7a8671b15: cmp r8d,ebp
│ ││╰│ 0x00007fc7a8671b18: jl 0x00007fc7a8671b08 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
│ ││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@15 (line 33)
│ ↘│ │ 0x00007fc7a8671b1a: test r10,r10
0.00% │ │ │ 0x00007fc7a8671b1d: je 0x00007fc7a8671b52
│ │ │ 0x00007fc7a8671b1f: mov rsi,r10
│ │ │ 0x00007fc7a8671b22: nop
│ │ │ 0x00007fc7a8671b23: call 0x00007fc7a8671ba0 ; ImmutableOopMap{}
│ │ │ ;*invokevirtual consume {reexecute=0 rethrow=0 return_oop=0}
│ │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@44 (line 41)
│ │ │ ; {optimized virtual_call}
│ │ │ 0x00007fc7a8671b28: add rsp,0x20
0.01% │ │ │ 0x00007fc7a8671b2c: pop rbp
│ │ │ 0x00007fc7a8671b2d: mov r10,QWORD PTR [r15+0x108]
│ │ │ 0x00007fc7a8671b34: test DWORD PTR [r10],eax ; {poll_return}
│ │ │ 0x00007fc7a8671b37: ret
│ ↘ │ 0x00007fc7a8671b38: mov edx,r11d
│ ╰ 0x00007fc7a8671b3b: jmp 0x00007fc7a8671b12
↘ 0x00007fc7a8671b3d: mov esi,0xffffff7e
0x00007fc7a8671b42: mov QWORD PTR [rsp],r10
0x00007fc7a8671b46: mov DWORD PTR [rsp+0x8],r9d
0x00007fc7a8671b4b: call 0x00007fc7a0ba3d00 ; ImmutableOopMap{[0]=Oop [8]=NarrowOop }
;*if_icmpge {reexecute=1 rethrow=0 return_oop=0}
findMax_if性能输出:
c2, level 4, codes.dbg.FindMaxBenchmark::findMax_if, version 480 (165 bytes)

0x00007f34cc66e7eb: cmp r8d,ebp
╭ 0x00007f34cc66e7ee: jae 0x00007f34cc66e8c4
│ 0x00007f34cc66e7f4: mov edx,DWORD PTR [r9+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
│ 0x00007f34cc66e7f8: cmp edx,0x80000000
│╭ 0x00007f34cc66e7fe: jg 0x00007f34cc66e805 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
││ ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
││ 0x00007f34cc66e800: mov edx,0x80000000 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
││ ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
│↘ 0x00007f34cc66e805: mov ebx,ebp
0.01% │ 0x00007f34cc66e807: add ebx,0xfffffffd
│ 0x00007f34cc66e80a: cmp r8d,ebx
│ 0x00007f34cc66e80d: cmovl ebx,r11d
│ 0x00007f34cc66e811: mov r8d,0x1
│ 0x00007f34cc66e817: cmp ebx,0x1
│ ╭ 0x00007f34cc66e81a: jle 0x00007f34cc66e880
│ │ 0x00007f34cc66e81c: mov rdi,r9 ;*goto {reexecute=0 rethrow=0 return_oop=0}
│ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
│ │╭ 0x00007f34cc66e81f: jmp 0x00007f34cc66e839
│ ││ ↗ 0x00007f34cc66e821: mov edx,ecx
0.00% │ ││ │ 0x00007f34cc66e823: nop DWORD PTR [rax+0x0]
│ ││ │ 0x00007f34cc66e82a: nop WORD PTR [rax+rax*1+0x0]
0.89% │ ││ │↗ 0x00007f34cc66e830: add r8d,0x4 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ ││ ││ ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
12.36% │ ││ ││ 0x00007f34cc66e834: cmp r8d,ebx
0.11% │ ││╭ ││ 0x00007f34cc66e837: jge 0x00007f34cc66e871 ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
│ │││ ││ ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
9.94% │ │↘│ ││ ↗ 0x00007f34cc66e839: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ │ ││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
0.11% │ │ │ ││ │ 0x00007f34cc66e83e: cmp r11d,edx
10.05% │ │ │╭ ││ │ 0x00007f34cc66e841: jg 0x00007f34cc66e862 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ │ ││ ││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
0.13% │ │ ││ ││↗ │ 0x00007f34cc66e843: mov r11d,DWORD PTR [r9+r8*4+0x14] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ ││ │││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
9.84% │ │ ││ │││ │ 0x00007f34cc66e848: cmp r11d,edx
0.11% │ │ ││╭ │││ │ 0x00007f34cc66e84b: jg 0x00007f34cc66e867 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ │ │││ │││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
10.02% │ │ │││ │││↗ │ 0x00007f34cc66e84d: mov r11d,DWORD PTR [r9+r8*4+0x18] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ │││ ││││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
0.33% │ │ │││ ││││ │ 0x00007f34cc66e852: cmp r11d,edx
23.63% │ │ │││╭││││ │ 0x00007f34cc66e855: jg 0x00007f34cc66e86c ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ │ ││││││││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
0.13% │ │ ││││││││↗│ 0x00007f34cc66e857: mov ecx,DWORD PTR [r9+r8*4+0x1c] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ ││││││││││ ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
9.89% │ │ ││││││││││ 0x00007f34cc66e85c: cmp ecx,edx
0.11% │ │ ││││╰│││││ 0x00007f34cc66e85e: jg 0x00007f34cc66e821 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
│ │ ││││ │││││ ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
9.71% │ │ ││││ ╰││││ 0x00007f34cc66e860: jmp 0x00007f34cc66e830
│ │ │↘││ ││││ 0x00007f34cc66e862: mov edx,r11d
0.00% │ │ │ ││ ╰│││ 0x00007f34cc66e865: jmp 0x00007f34cc66e843
│ │ │ ↘│ │││ 0x00007f34cc66e867: mov edx,r11d
0.00% │ │ │ │ ╰││ 0x00007f34cc66e86a: jmp 0x00007f34cc66e84d
│ │ │ ↘ ││ 0x00007f34cc66e86c: mov edx,r11d
0.00% │ │ │ ╰│ 0x00007f34cc66e86f: jmp 0x00007f34cc66e857
│ │ ↘ │ 0x00007f34cc66e871: mov r11,QWORD PTR [r15+0x108] ; ImmutableOopMap{r10=Oop r9=NarrowOop rdi=Oop }
│ │ │ ;*goto {reexecute=1 rethrow=0 return_oop=0}
│ │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
0.00% │ │ │ 0x00007f34cc66e878: test DWORD PTR [r11],eax ;*goto {reexecute=0 rethrow=0 return_oop=0}
│ │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
│ │ │ ; {poll}
│ │ │ 0x00007f34cc66e87b: cmp r8d,ebx
│ │ ╰ 0x00007f34cc66e87e: jl 0x00007f34cc66e839
│ ↘ 0x00007f34cc66e880: cmp r8d,ebp
0.00% │ ╭ 0x00007f34cc66e883: jge 0x00007f34cc66e89a
│ │ 0x00007f34cc66e885: data16 xchg ax,ax ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
│ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
0.00% │ │ ↗ 0x00007f34cc66e888: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
│ │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
0.01% │ │ │ 0x00007f34cc66e88d: cmp r11d,edx
│ │╭│ 0x00007f34cc66e890: jg 0x00007f34cc66e8b8
│ │││↗ 0x00007f34cc66e892: inc r8d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
│ ││││ ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
│ ││││ 0x00007f34cc66e895: cmp r8d,ebp
│ ││╰│ 0x00007f34cc66e898: jl 0x00007f34cc66e888 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
│ ││ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@15 (line 18)
│ ↘│ │↗ 0x00007f34cc66e89a: test r10,r10
0.00% │ │ ││ 0x00007f34cc66e89d: je 0x00007f34cc66e8da
│ │ ││ 0x00007f34cc66e89f: mov rsi,r10
│ │ ││ 0x00007f34cc66e8a2: nop
│ │ ││ 0x00007f34cc66e8a3: call 0x00007f34cc66e920 ; ImmutableOopMap{}
│ │ ││ ;*invokevirtual consume {reexecute=0 rethrow=0 return_oop=0}
│ │ ││ ; - codes.dbg.FindMaxBenchmark::findMax_if@39 (line 24)
│ │ ││ ; {optimized virtual_call}
0.00% │ │ ││ 0x00007f34cc66e8a8: add rsp,0x20
0.01% │ │ ││ 0x00007f34cc66e8ac: pop rbp
│ │ ││ 0x00007f34cc66e8ad: mov r10,QWORD PTR [r15+0x108]
│ │ ││ 0x00007f34cc66e8b4: test DWORD PTR [r10],eax ; {poll_return}
│ │ ││ 0x00007f34cc66e8b7: ret
│ ↘ ││ 0x00007f34cc66e8b8: mov edx,r11d
│ ╰│ 0x00007f34cc66e8bb: jmp 0x00007f34cc66e892
│ │ 0x00007f34cc66e8bd: mov edx,0x80000000
│ ╰ 0x00007f34cc66e8c2: jmp 0x00007f34cc66e89a
↘ 0x00007f34cc66e8c4: mov esi,0xffffff7e
0x00007f34cc66e8c9: mov QWORD PTR [rsp],r10
0x00007f34cc66e8cd: mov DWORD PTR [rsp+0x8],r9d
....................................................................................................
观察值:
  • findMax_iffindMax_if_else之间只有一个显着差异:
  • 0x00007f34cc66e85e: jg 0x00007f34cc66e8210x00007fc7a8671ade: jle 0x00007fc7a8671ab0
  • findMax_intrinsicMax破坏了固有的Math.max的性能最差,这与我的直觉相反。

  • 问题:
  • 是否添加包含不改变任何内容的代码的else语句(例如x = x;)是否正常?特别是在一个线程上执行的代码中。
  • thourghput差异的真正来源在哪里?我看到jg(如果大于则跳转)不是jle(如果小于或等于则跳转)。实际上,第一条件是倒置的第二条件。
  • 如果简单的Math.max语句具有较高的吞吐量,那么使用if else有什么意义?

  • Source code on GitHub run_tests.sh运行基准测试并生成图。

    最佳答案

    首先,为了最大程度地减少无关的ASM代码的数量并简化分析,让我们添加以下JVM选项:

  • -XX:LoopUnrollLimit=0-关闭循环展开;
  • -XX:-UseCountedLoopSafepoints-从循环中消除安全点轮询。

  • 现在,支持 if_else的性能差异将更大,而结果汇编将更加简单。这是两个基准的循环体。
    findMax_if
             ╭     0x0000029707af78f5: jmp     29707af7908h
    │ ↗ 0x0000029707af78f7: mov r8d,ecx
    │ │ 0x0000029707af78fa: nop word ptr [rax+rax+0h]
    0,66% │ │↗ 0x0000029707af7900: inc r9d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
    │ ││ ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
    1,02% │ ││ 0x0000029707af7903: cmp r9d,r10d
    │╭││ 0x0000029707af7906: jnl 29707af7914h ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
    ││││ ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
    2,06% ↘│││ 0x0000029707af7908: mov ecx,dword ptr [r11+r9*4+10h]
    │││ ;*iaload {reexecute=0 rethrow=0 return_oop=0}
    │││ ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
    50,86% │││ 0x0000029707af790d: cmp ecx,r8d
    0,02% │╰│ 0x0000029707af7910: jnle 29707af78f7h ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
    │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
    41,01% │ ╰ 0x0000029707af7912: jmp 29707af7900h ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
    │ ; - codes.dbg.FindMaxBenchmark::findMax_if@15 (line 18)
    ↘ 0x0000029707af7914: test rbx,rbx
    findMax_if_else
             ╭     0x00000137d24d4b75: jmp     137d24d4b88h
    │ ↗ 0x00000137d24d4b77: mov r8d,ecx
    │ │ 0x00000137d24d4b7a: nop word ptr [rax+rax+0h]
    72,63% │ ↗│ 0x00000137d24d4b80: inc r9d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
    │ ││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
    0,05% │ ││ 0x00000137d24d4b83: cmp r9d,r10d
    0,01% │╭││ 0x00000137d24d4b86: jnl 137d24d4b94h ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
    ││││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
    6,47% ↘│││ 0x00000137d24d4b88: mov ecx,dword ptr [r11+r9*4+10h]
    │││ ;*iaload {reexecute=0 rethrow=0 return_oop=0}
    │││ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
    15,93% │││ 0x00000137d24d4b8d: cmp ecx,r8d
    0,18% │╰│ 0x00000137d24d4b90: jle 137d24d4b80h ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
    │ │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
    0,01% │ ╰ 0x00000137d24d4b92: jmp 137d24d4b77h ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
    │ ; - codes.dbg.FindMaxBenchmark::findMax_if_else@15 (line 33)
    ↘ 0x00000137d24d4b94: test rbx,rbx
    这与您的发现一致:两次编译之间的唯一区别是反向跳转条件: jnlejle。为什么 jnle变体那么慢?
    如果仔细看一下基准代码,就会发现当前最大变化的点很少发生。平均而言,整个循环中 data[i] > result仅正确14次。这意味着 jnle分支仅占用0.001%的时间,其余99.999%的时间通过下一条 jmp指令执行。
    相反,第二个变体中的 jle指令占用了99.999%的时间,执行几乎从未达到以下 jmp。因此,第一个循环每次迭代退出7条指令,而第二个循环-仅6条指令。
    JMH具有内置的 perfnorm探查器(在Linux上可用),该探查器通过CPU性能计数器stats补充基准测试结果。让我们使用 -prof perfnorm运行它。
    Benchmark                                                Mode  Cnt        Score    Error  Units
    FindMaxBenchmark.findMax_if thrpt 10 1447.576 ± 8.854 ops/s
    FindMaxBenchmark.findMax_if:CPI thrpt 0.335 #/op
    FindMaxBenchmark.findMax_if:L1-dcache-load-misses thrpt 63971.361 #/op
    FindMaxBenchmark.findMax_if:L1-dcache-loads thrpt 1014974.522 #/op
    FindMaxBenchmark.findMax_if:L1-dcache-stores thrpt 6105.121 #/op
    FindMaxBenchmark.findMax_if:L1-icache-load-misses thrpt 1641.074 #/op
    FindMaxBenchmark.findMax_if:branch-misses thrpt 146.305 #/op
    FindMaxBenchmark.findMax_if:branches thrpt 3006620.048 #/op
    FindMaxBenchmark.findMax_if:cycles thrpt 2358093.526 #/op
    FindMaxBenchmark.findMax_if:dTLB-load-misses thrpt 1085.740 #/op
    FindMaxBenchmark.findMax_if:dTLB-loads thrpt 1012739.362 #/op
    FindMaxBenchmark.findMax_if:dTLB-store-misses thrpt 21.985 #/op
    FindMaxBenchmark.findMax_if:dTLB-stores thrpt 6146.243 #/op
    FindMaxBenchmark.findMax_if:iTLB-load-misses thrpt 139.741 #/op
    FindMaxBenchmark.findMax_if:iTLB-loads thrpt 42.031 #/op
    FindMaxBenchmark.findMax_if:instructions thrpt 7039394.622 #/op
    FindMaxBenchmark.findMax_if_else thrpt 10 2472.400 ± 36.958 ops/s
    FindMaxBenchmark.findMax_if_else:CPI thrpt 0.229 #/op
    FindMaxBenchmark.findMax_if_else:L1-dcache-load-misses thrpt 63353.481 #/op
    FindMaxBenchmark.findMax_if_else:L1-dcache-loads thrpt 1007856.753 #/op
    FindMaxBenchmark.findMax_if_else:L1-dcache-stores thrpt 3696.805 #/op
    FindMaxBenchmark.findMax_if_else:L1-icache-load-misses thrpt 1182.253 #/op
    FindMaxBenchmark.findMax_if_else:branch-misses thrpt 72.334 #/op
    FindMaxBenchmark.findMax_if_else:branches thrpt 2000460.845 #/op
    FindMaxBenchmark.findMax_if_else:cycles thrpt 1380927.546 #/op
    FindMaxBenchmark.findMax_if_else:dTLB-load-misses thrpt 845.629 #/op
    FindMaxBenchmark.findMax_if_else:dTLB-loads thrpt 1006135.685 #/op
    FindMaxBenchmark.findMax_if_else:dTLB-store-misses thrpt 13.336 #/op
    FindMaxBenchmark.findMax_if_else:dTLB-stores thrpt 3545.950 #/op
    FindMaxBenchmark.findMax_if_else:iTLB-load-misses thrpt 80.233 #/op
    FindMaxBenchmark.findMax_if_else:iTLB-loads thrpt 19.009 #/op
    FindMaxBenchmark.findMax_if_else:instructions thrpt 6018937.376 #/op
    性能计数器确认 findMax_if用3M分支执行7M指令,而 findMax_if_else用2M分支执行6M指令。我想现在很清楚差异的来源,那么其他问题呢?

    Is normal to add else statement containing code which doesn't changeanything


    我不这么认为。至少因为这看起来违反直觉,并且使代码更难于阅读和理解。幸运的是,冗余代码很好地反转了分支条件。将您的随机数组替换为排序后的数组,以便 data[i] > result大部分为true,然后 findMax_if将成为最快的选择。

    What is the point of using Math.max if simple if else statement hashigher throughput?


    同样,这并不总是正确的。这在很大程度上取决于数据的性质。当分支易于预测时, if语句的性能会更好。但是,一旦分支预测变量经常开始出现故障,性能就会急剧下降。 JVMt内在方法 Math.max转换为无分支 cmov指令,无论数据分布如何,它都具有性能稳定的优点。
    这是一个示例数据集,其中 Math.max大大胜过所有其他选项:
    public void setup() {
    Random r = new Random();
    this.tab = r.ints(SIZE).sorted().toArray();
    for (int i = 0; i < tab.length; i += ThreadLocalRandom.current().nextInt(3)) {
    tab[i] = 0;
    }
    }

    关于java - Java中 "if"和 "if else"的不同性能,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/63783852/

    24 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com