gpt4 book ai didi

c++ - -O1 改变 float 学

转载 作者:行者123 更新时间:2023-12-04 01:05:02 28 4
gpt4 key购买 nike

当使用 c++-O0 编译时,以下 -O1 程序给出了数值不同的结果。

#include <iostream>
#include <array>
#include <cmath>
#include <iomanip>

int main()
{
std::array<double, 2> v;
v.fill(0);

std::get<0>(v) = 0x1.5b4d3afe1f7d2p-1;
double theta = 0x1.1aef12f82caf9p+2;

double c = std::cos(theta);
double s = std::sin(theta);
double vi = c * std::get<0>(v) - s * std::get<1>(v);
double vj = s * std::get<0>(v) + c * std::get<1>(v);
std::get<0>(v) = vi;
std::get<1>(v) = vj;

std::cout << std::hexfloat;
for (const auto& x : v)
std::cout << x << " ";
std::cout << std::endl;

return 0;
}

我正在使用 gcc 9.2.0。我使用 std::hexfloat 来确保以全精度打印出 double 变量。

编译
g++ -std=c++17 -Wall -pedantic -O0 -o test test.cpp

给出以下结果(粗体我的)
-0x1.8f4e436eb5371p-3 -0x1.4ca54aa5d4e1ep-1

With

 g++ -std=c++17 -Wall -pedantic -O1 -o test test.cpp

输出是
-0x1.8f4e436eb5372p-3 -0x1.4ca54aa5d4e1ep-1

The difference between the optimizations flags enabled by -O0 and -O1 that I can extract with -Q --help=optimizers are as follows

$ g++ -O0 -Q --help=optimizers >optO0.txt
$ g++ -O1 -Q --help=optimizers >optO1.txt
$ diff optO0.txt optO1.txt|grep ^'>'
> -fbranch-count-reg [enabled]
> -fcombine-stack-adjustments [enabled]
> -fcompare-elim [enabled]
> -fcprop-registers [enabled]
> -fdefer-pop [enabled]
> -fforward-propagate [enabled]
> -fguess-branch-probability [enabled]
> -fif-conversion [enabled]
> -fif-conversion2 [enabled]
> -finline-functions-called-once [enabled]
> -fipa-profile [enabled]
> -fipa-pure-const [enabled]
> -fipa-reference [enabled]
> -fipa-reference-addressable [enabled]
> -fmove-loop-invariants [enabled]
> -fomit-frame-pointer [enabled]
> -freorder-blocks [enabled]
> -fshrink-wrap [enabled]
> -fsplit-wide-types [enabled]
> -fssa-phiopt [enabled]
> -ftree-bit-ccp [enabled]
> -ftree-builtin-call-dce [enabled]
> -ftree-ccp [enabled]
> -ftree-ch [enabled]
> -ftree-coalesce-vars [enabled]
> -ftree-copy-prop [enabled]
> -ftree-dce [enabled]
> -ftree-dominator-opts [enabled]
> -ftree-dse [enabled]
> -ftree-fre [enabled]
> -ftree-pta [enabled]
> -ftree-sink [enabled]
> -ftree-slsr [enabled]
> -ftree-sra [enabled]
> -ftree-ter [enabled]

根据这个列表,以及 gcc 的手册页, -O1 没有启用改变浮点数学的优化标志,比如 -ffast-math ,那么为什么输出不同?

编辑:

使用 answerthis question 中建议的标志 -ffloat-store 不会改变结果。

C 库版本是
$ ldd --version
ldd (GNU libc) 2.30

该库是从 Arch Linux 的 glibc 2.30-1 包中安装的。架构是 x86_64。

这是 -O0 案例的汇编程序转储(使用 g++ -std=c++17 -Wall -pedantic -O0 -S -o test test.cpp 获得)
.file   "test.cpp"
.text
.section .text._ZStanSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStanSt13_Ios_FmtflagsS_,comdat
.weak _ZStanSt13_Ios_FmtflagsS_
.type _ZStanSt13_Ios_FmtflagsS_, @function
_ZStanSt13_Ios_FmtflagsS_:
.LFB1415:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -4(%rbp), %eax
andl -8(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1415:
.size _ZStanSt13_Ios_FmtflagsS_, .-_ZStanSt13_Ios_FmtflagsS_
.section .text._ZStorSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStorSt13_Ios_FmtflagsS_,comdat
.weak _ZStorSt13_Ios_FmtflagsS_
.type _ZStorSt13_Ios_FmtflagsS_, @function
_ZStorSt13_Ios_FmtflagsS_:
.LFB1416:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -4(%rbp), %eax
orl -8(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1416:
.size _ZStorSt13_Ios_FmtflagsS_, .-_ZStorSt13_Ios_FmtflagsS_
.section .text._ZStcoSt13_Ios_Fmtflags,"axG",@progbits,_ZStcoSt13_Ios_Fmtflags,comdat
.weak _ZStcoSt13_Ios_Fmtflags
.type _ZStcoSt13_Ios_Fmtflags, @function
_ZStcoSt13_Ios_Fmtflags:
.LFB1418:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
notl %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1418:
.size _ZStcoSt13_Ios_Fmtflags, .-_ZStcoSt13_Ios_Fmtflags
.section .text._ZStoRRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStoRRSt13_Ios_FmtflagsS_,comdat
.weak _ZStoRRSt13_Ios_FmtflagsS_
.type _ZStoRRSt13_Ios_FmtflagsS_, @function
_ZStoRRSt13_Ios_FmtflagsS_:
.LFB1419:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movq -8(%rbp), %rax
movl (%rax), %eax
movl -12(%rbp), %edx
movl %edx, %esi
movl %eax, %edi
call _ZStorSt13_Ios_FmtflagsS_
movq -8(%rbp), %rdx
movl %eax, (%rdx)
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1419:
.size _ZStoRRSt13_Ios_FmtflagsS_, .-_ZStoRRSt13_Ios_FmtflagsS_
.section .text._ZStaNRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStaNRSt13_Ios_FmtflagsS_,comdat
.weak _ZStaNRSt13_Ios_FmtflagsS_
.type _ZStaNRSt13_Ios_FmtflagsS_, @function
_ZStaNRSt13_Ios_FmtflagsS_:
.LFB1420:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movq -8(%rbp), %rax
movl (%rax), %eax
movl -12(%rbp), %edx
movl %edx, %esi
movl %eax, %edi
call _ZStanSt13_Ios_FmtflagsS_
movq -8(%rbp), %rdx
movl %eax, (%rdx)
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1420:
.size _ZStaNRSt13_Ios_FmtflagsS_, .-_ZStaNRSt13_Ios_FmtflagsS_
.section .text._ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,"axG",@progbits,_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,comdat
.align 2
.weak _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
.type _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, @function
_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_:
.LFB1449:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movq %rdi, -24(%rbp)
movl %esi, -28(%rbp)
movl %edx, -32(%rbp)
movq -24(%rbp), %rax
movl 24(%rax), %eax
movl %eax, -4(%rbp)
movl -32(%rbp), %eax
movl %eax, %edi
call _ZStcoSt13_Ios_Fmtflags
movl %eax, %edx
movq -24(%rbp), %rax
addq $24, %rax
movl %edx, %esi
movq %rax, %rdi
call _ZStaNRSt13_Ios_FmtflagsS_
movl -32(%rbp), %edx
movl -28(%rbp), %eax
movl %edx, %esi
movl %eax, %edi
call _ZStanSt13_Ios_FmtflagsS_
movl %eax, %edx
movq -24(%rbp), %rax
addq $24, %rax
movl %edx, %esi
movq %rax, %rdi
call _ZStoRRSt13_Ios_FmtflagsS_
movl -4(%rbp), %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1449:
.size _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, .-_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
.section .text._ZSt8hexfloatRSt8ios_base,"axG",@progbits,_ZSt8hexfloatRSt8ios_base,comdat
.weak _ZSt8hexfloatRSt8ios_base
.type _ZSt8hexfloatRSt8ios_base, @function
_ZSt8hexfloatRSt8ios_base:
.LFB1481:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl $256, %esi
movl $4, %edi
call _ZStorSt13_Ios_FmtflagsS_
movl %eax, %ecx
movq -8(%rbp), %rax
movl $260, %edx
movl %ecx, %esi
movq %rax, %rdi
call _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1481:
.size _ZSt8hexfloatRSt8ios_base, .-_ZSt8hexfloatRSt8ios_base
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.section .rodata
.LC3:
.string " "
.text
.globl main
.type main, @function
main:
.LFB2816:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
addq $-128, %rsp
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
pxor %xmm0, %xmm0
movsd %xmm0, -112(%rbp)
leaq -112(%rbp), %rdx
leaq -32(%rbp), %rax
movq %rdx, %rsi
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4fillERKd
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd .LC1(%rip), %xmm0
movsd %xmm0, (%rax)
movsd .LC2(%rip), %xmm0
movsd %xmm0, -96(%rbp)
movq -96(%rbp), %rax
movq %rax, %xmm0
call cos@PLT
movq %xmm0, %rax
movq %rax, -88(%rbp)
movq -96(%rbp), %rax
movq %rax, %xmm0
call sin@PLT
movq %xmm0, %rax
movq %rax, -80(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -88(%rbp), %xmm0
movsd %xmm0, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -80(%rbp), %xmm0
movsd -120(%rbp), %xmm1
subsd %xmm0, %xmm1
movapd %xmm1, %xmm0
movsd %xmm0, -72(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -80(%rbp), %xmm0
movsd %xmm0, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -88(%rbp), %xmm0
addsd -120(%rbp), %xmm0
movsd %xmm0, -64(%rbp)
movsd -72(%rbp), %xmm3
movsd %xmm3, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd -120(%rbp), %xmm3
movsd %xmm3, (%rax)
movsd -64(%rbp), %xmm4
movsd %xmm4, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd -120(%rbp), %xmm4
movsd %xmm4, (%rax)
leaq _ZSt8hexfloatRSt8ios_base(%rip), %rsi
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEPFRSt8ios_baseS0_E@PLT
leaq -32(%rbp), %rax
movq %rax, -56(%rbp)
movq -56(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE5beginEv
movq %rax, -104(%rbp)
movq -56(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE3endEv
movq %rax, -48(%rbp)
.L17:
movq -104(%rbp), %rax
cmpq -48(%rbp), %rax
je .L16
movq -104(%rbp), %rax
movq %rax, -40(%rbp)
movq -40(%rbp), %rax
movq (%rax), %rax
movq %rax, %xmm0
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEd@PLT
leaq .LC3(%rip), %rsi
movq %rax, %rdi
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@PLT
addq $8, -104(%rbp)
jmp .L17
.L16:
movq _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@GOTPCREL(%rip), %rax
movq %rax, %rsi
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEPFRSoS_E@PLT
movl $0, %eax
movq -8(%rbp), %rcx
xorq %fs:40, %rcx
je .L19
call __stack_chk_fail@PLT
.L19:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2816:
.size main, .-main
.section .text._ZNSt5arrayIdLm2EE4fillERKd,"axG",@progbits,_ZNSt5arrayIdLm2EE4fillERKd,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE4fillERKd
.type _ZNSt5arrayIdLm2EE4fillERKd, @function
_ZNSt5arrayIdLm2EE4fillERKd:
.LFB3128:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $24, %rsp
.cfi_offset 3, -24
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq -24(%rbp), %rax
movq %rax, %rdi
call _ZNKSt5arrayIdLm2EE4sizeEv
movq %rax, %rbx
movq -24(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE5beginEv
movq %rax, %rcx
movq -32(%rbp), %rax
movq %rax, %rdx
movq %rbx, %rsi
movq %rcx, %rdi
call _ZSt6fill_nIPdmdET_S1_T0_RKT1_
nop
addq $24, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3128:
.size _ZNSt5arrayIdLm2EE4fillERKd, .-_ZNSt5arrayIdLm2EE4fillERKd
.section .text._ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
.weak _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
.type _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3129:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $0, %esi
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3129:
.size _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
.section .text._ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
.weak _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
.type _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3130:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $1, %esi
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3130:
.size _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
.section .text._ZNSt5arrayIdLm2EE5beginEv,"axG",@progbits,_ZNSt5arrayIdLm2EE5beginEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE5beginEv
.type _ZNSt5arrayIdLm2EE5beginEv, @function
_ZNSt5arrayIdLm2EE5beginEv:
.LFB3132:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4dataEv
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3132:
.size _ZNSt5arrayIdLm2EE5beginEv, .-_ZNSt5arrayIdLm2EE5beginEv
.section .text._ZNSt5arrayIdLm2EE3endEv,"axG",@progbits,_ZNSt5arrayIdLm2EE3endEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE3endEv
.type _ZNSt5arrayIdLm2EE3endEv, @function
_ZNSt5arrayIdLm2EE3endEv:
.LFB3133:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4dataEv
addq $16, %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3133:
.size _ZNSt5arrayIdLm2EE3endEv, .-_ZNSt5arrayIdLm2EE3endEv
.section .text._ZNKSt5arrayIdLm2EE4sizeEv,"axG",@progbits,_ZNKSt5arrayIdLm2EE4sizeEv,comdat
.align 2
.weak _ZNKSt5arrayIdLm2EE4sizeEv
.type _ZNKSt5arrayIdLm2EE4sizeEv, @function
_ZNKSt5arrayIdLm2EE4sizeEv:
.LFB3247:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movl $2, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3247:
.size _ZNKSt5arrayIdLm2EE4sizeEv, .-_ZNKSt5arrayIdLm2EE4sizeEv
.section .text._ZSt6fill_nIPdmdET_S1_T0_RKT1_,"axG",@progbits,_ZSt6fill_nIPdmdET_S1_T0_RKT1_,comdat
.weak _ZSt6fill_nIPdmdET_S1_T0_RKT1_
.type _ZSt6fill_nIPdmdET_S1_T0_RKT1_, @function
_ZSt6fill_nIPdmdET_S1_T0_RKT1_:
.LFB3248:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq %rdx, -24(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZSt12__niter_baseIPdET_S1_
movq %rax, %rcx
movq -24(%rbp), %rdx
movq -16(%rbp), %rax
movq %rax, %rsi
movq %rcx, %rdi
call _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
movq %rax, %rdx
leaq -8(%rbp), %rax
movq %rdx, %rsi
movq %rax, %rdi
call _ZSt12__niter_wrapIPdET_RKS1_S1_
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3248:
.size _ZSt6fill_nIPdmdET_S1_T0_RKT1_, .-_ZSt6fill_nIPdmdET_S1_T0_RKT1_
.section .text._ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,comdat
.weak _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
.type _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, @function
_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm:
.LFB3249:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq -16(%rbp), %rax
leaq 0(,%rax,8), %rdx
movq -8(%rbp), %rax
addq %rdx, %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3249:
.size _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, .-_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
.section .text._ZNSt5arrayIdLm2EE4dataEv,"axG",@progbits,_ZNSt5arrayIdLm2EE4dataEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE4dataEv
.type _ZNSt5arrayIdLm2EE4dataEv, @function
_ZNSt5arrayIdLm2EE4dataEv:
.LFB3250:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3250:
.size _ZNSt5arrayIdLm2EE4dataEv, .-_ZNSt5arrayIdLm2EE4dataEv
.section .text._ZSt12__niter_baseIPdET_S1_,"axG",@progbits,_ZSt12__niter_baseIPdET_S1_,comdat
.weak _ZSt12__niter_baseIPdET_S1_
.type _ZSt12__niter_baseIPdET_S1_, @function
_ZSt12__niter_baseIPdET_S1_:
.LFB3318:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3318:
.size _ZSt12__niter_baseIPdET_S1_, .-_ZSt12__niter_baseIPdET_S1_
.section .text._ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,"axG",@progbits,_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,comdat
.weak _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
.type _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, @function
_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_:
.LFB3319:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq %rdx, -40(%rbp)
movq -40(%rbp), %rax
movsd (%rax), %xmm0
movsd %xmm0, -8(%rbp)
movq -32(%rbp), %rax
movq %rax, -16(%rbp)
.L41:
cmpq $0, -16(%rbp)
je .L40
movq -24(%rbp), %rax
movsd -8(%rbp), %xmm0
movsd %xmm0, (%rax)
subq $1, -16(%rbp)
addq $8, -24(%rbp)
jmp .L41
.L40:
movq -24(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3319:
.size _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, .-_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
.section .text._ZSt12__niter_wrapIPdET_RKS1_S1_,"axG",@progbits,_ZSt12__niter_wrapIPdET_RKS1_S1_,comdat
.weak _ZSt12__niter_wrapIPdET_RKS1_S1_
.type _ZSt12__niter_wrapIPdET_RKS1_S1_, @function
_ZSt12__niter_wrapIPdET_RKS1_S1_:
.LFB3320:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq -16(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3320:
.size _ZSt12__niter_wrapIPdET_RKS1_S1_, .-_ZSt12__niter_wrapIPdET_RKS1_S1_
.section .text._ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,comdat
.weak _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
.type _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, @function
_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd:
.LFB3321:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3321:
.size _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, .-_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
.text
.type _Z41__static_initialization_and_destruction_0ii, @function
_Z41__static_initialization_and_destruction_0ii:
.LFB3455:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
cmpl $1, -4(%rbp)
jne .L49
cmpl $65535, -8(%rbp)
jne .L49
leaq _ZStL8__ioinit(%rip), %rdi
call _ZNSt8ios_base4InitC1Ev@PLT
leaq __dso_handle(%rip), %rdx
leaq _ZStL8__ioinit(%rip), %rsi
movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rax
movq %rax, %rdi
call __cxa_atexit@PLT
.L49:
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3455:
.size _Z41__static_initialization_and_destruction_0ii, .-_Z41__static_initialization_and_destruction_0ii
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3456:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $65535, %esi
movl $1, %edi
call _Z41__static_initialization_and_destruction_0ii
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3456:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.section .rodata
.align 8
.LC1:
.long 2950821842
.long 1072018643
.align 8
.LC2:
.long 797100793
.long 1074900721
.hidden __dso_handle
.ident "GCC: (GNU) 9.2.0"
.section .note.GNU-stack,"",@progbits

使用 -O1 ( g++ -std=c++17 -Wall -pedantic -O1 -S -o test test.cpp ) 时的汇编程序转储是
    .file   "test.cpp"
.text
.section .rodata.str1.1,"aMS",@progbits,1
.LC1:
.string " "
.text
.globl main
.type main, @function
main:
.LFB2853:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
leaq _ZSt4cout(%rip), %rbx
movq _ZSt4cout(%rip), %rax
movq %rbx, %rcx
addq -24(%rax), %rcx
orl $260, 24(%rcx)
movsd .LC0(%rip), %xmm0
movq %rbx, %rdi
call _ZNSo9_M_insertIdEERSoT_@PLT
movq %rax, %rdi
movl $1, %edx
leaq .LC1(%rip), %rsi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
movsd .LC2(%rip), %xmm0
movq %rbx, %rdi
call _ZNSo9_M_insertIdEERSoT_@PLT
movq %rax, %rdi
movl $1, %edx
leaq .LC1(%rip), %rsi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
movq %rbx, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@PLT
movl $0, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE2853:
.size main, .-main
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3477:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
leaq _ZStL8__ioinit(%rip), %rdi
call _ZNSt8ios_base4InitC1Ev@PLT
leaq __dso_handle(%rip), %rdx
leaq _ZStL8__ioinit(%rip), %rsi
movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi
call __cxa_atexit@PLT
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE3477:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC0:
.long 921391986
.long -1077349148
.align 8
.LC2:
.long 2858241566
.long -1075525036
.hidden __dso_handle
.ident "GCC: (GNU) 9.2.0"
.section .note.GNU-stack,"",@progbits

编辑二

根据要求,可以找到使用 objdump -d 编译的可执行文件上的 -O0 here(由于长度限制,我无法在此处发布)。 ldd 的输出表明可执行文件再次链接到 /usr/lib/libm.so.6objdump -d /bin/libm.so.6 输出的一部分(.init、.plt、sin 和 cos 节)在 here 中找到。

最佳答案

-O1 , float 计算发生在编译时,使用 GNU MPFR library .即使对于 sin 等函数,MPFR 也有望给出正确舍入的结果。和 cos .您的数学库可能对这些函数有不同的精度目标,这就是为什么运行时计算(在 -O0 优化级别)有时会给出不同的结果。例如,GNU C library has a general accuracy goal of a few ulp .

据报道,IEEE 754 仅对数学库函数的一个子集(显然是 sqrt)有精度要求,这使得数学库能够在超越函数的速度和精度之间选择不同的权衡。 (不幸的是,我无法访问 IEEE 754,因为 IEEE 反对知识的公开传播。)

关于c++ - -O1 改变 float 学,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58556044/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com