gpt4 book ai didi

c++ - std::vector 在启用 C++11/14 时手动复制而不是调用 memcpy

转载 作者:IT老高 更新时间:2023-10-28 23:18:18 27 4
gpt4 key购买 nike

使用gcc 4.9,使用Linaro工具链交叉编译ARM,我找到了vector.assign()的编译结果添加 -std=c++14 时的变化, 在某种程度上会产生严重的性能问题。

我已经尝试了几种不同的方法来进行这种分配+复制,但是只要我使用std::vector,它们都会有这个性能问题。去做吧。

我可以用这个玩具示例重现问题:

VectorTest.h

#include <stdint.h>
#include <stddef.h>
#include <vector>

struct VectorWrapper_t
{
VectorWrapper_t(uint8_t const* pData, size_t length);
std::vector<uint8_t> data;
};

VectorTest.cpp

#include "VectorTest.h"

VectorWrapper_t::VectorWrapper_t(uint8_t const* pData, size_t length)
{
data.assign(pData, pData + length);
}

gcc 标志:

-std=c++14 \
-mthumb -march=armv7-a -mtune=cortex-a9 \
-mlittle-endian -mfloat-abi=hard -mfpu=neon -Wa,-mimplicit-it=thumb \
-O2 -g

查看程序集,我明白了原因:原始版本(我假设是 C++03?)调用 memmove ,而 C++14 版本添加了一个额外的循环,看起来像是手动复制数据。看着.loc标签 gcc 添加 -fverbose-asm ,此循环中的指令来自stl_construct.hstl_uninitialized.h .

更改为 gcc 5.2.1(使用 C++14),它的编译几乎与 C++03 示例相同,除了 memcpy而不是 memmove .

我可以通过使用 std::unique_ptr<uint8_t[]> 来解决这个问题。而不是 vector这里。但是,我想深入了解这个问题,以确定其他地方是否使用 vector s 可能存在性能问题以及如何修复它们(更新到 gcc 5.2 是不切实际的)。

所以我的问题是:为什么它在 C++11/14 下编译不同?

供引用,gcc --version报告:
arm-linux-gnueabihf-gcc (Linaro GCC 4.9-2014.12) 4.9.3 20141205 (prerelease) .

这是gcc生成的程序集:

# C++03, gcc 4.9

push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp118,
mov r4, r0 @ this, this
str r3, [r0] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_start
mov r5, r2 @ length, length
str r3, [r0, #4] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L19 @ length,
mov r0, r4 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L19:
mov r0, r2 @, length
mov r6, r1 @ pData, pData
bl _Znwj @
mov r2, r5 @, length
mov r1, r6 @, pData
mov r7, r0 @ D.13516,
bl memmove @
ldr r0, [r4] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_start
cbz r0, .L3 @ D.13515,
bl _ZdlPv @
.L3:
add r5, r5, r7 @ D.13515, D.13516
str r7, [r4] @ D.13516, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_start
str r5, [r4, #4] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_finish
mov r0, r4 @, this
str r5, [r4, #8] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L6:
ldr r0, [r4] @ D.13515, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L5 @ D.13515,
bl _ZdlPv @
.L5:
bl __cxa_end_cleanup @

# C++14, gcc 4.9

push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp157,
mov r6, r0 @ this, this
str r3, [r0] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_start
mov r5, r2 @ length, length
str r3, [r0, #4] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L25 @ length,
mov r0, r6 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L25:
mov r0, r2 @, length
mov r4, r1 @ pData, pData
bl _Znwj @
adds r3, r4, r5 @ D.20345, pData, length
mov r7, r0 @ __result,
cmp r4, r3 @ pData, D.20345
ittt ne
addne r1, r4, #-1 @ ivtmp.76, pData,
movne r3, r0 @ __result, __result
addne r4, r0, r5 @ D.20346, __result, length
beq .L26 @,
.L7:
ldrb r2, [r1, #1]! @ zero_extendqisi2 @ D.20348, MEM[base: _48, offset: 0]
cbz r3, .L6 @ __result,
strb r2, [r3] @ D.20348, MEM[base: __result_23, offset: 0B]
.L6:
adds r3, r3, #1 @ __result, __result,
cmp r3, r4 @ __result, D.20346
bne .L7 @,
.L8:
ldr r0, [r6] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_start
cbz r0, .L5 @ D.20346,
bl _ZdlPv @
.L5:
str r7, [r6] @ __result, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_start
mov r0, r6 @, this
str r4, [r6, #4] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_finish
str r4, [r6, #8] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L26:
adds r4, r0, r5 @ D.20346, __result, length
b .L8 @
.L11:
ldr r0, [r6] @ D.20346, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L10 @ D.20346,
bl _ZdlPv @
.L10:
bl __cxa_end_cleanup @

# C++14, gcc 5.2

push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp118,
mov r4, r0 @ this, this
str r3, [r0] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_start
str r3, [r0, #4] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L19 @ length,
mov r0, r4 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L19:
mov r0, r2 @, length
mov r6, r1 @ pData, pData
mov r5, r2 @ length, length
bl _Znwj @
mov r2, r5 @, length
mov r1, r6 @, pData
mov r7, r0 @ D.20824,
bl memcpy @
ldr r0, [r4] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_start
cbz r0, .L3 @ D.20823,
bl _ZdlPv @
.L3:
add r5, r5, r7 @ D.20823, D.20824
str r7, [r4] @ D.20824, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_start
str r5, [r4, #4] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_finish
mov r0, r4 @, this
str r5, [r4, #8] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L6:
ldr r0, [r4] @ D.20823, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L5 @ D.20823,
bl _ZdlPv @
.L5:
bl __cxa_end_cleanup @

最佳答案

这是 4.9.2 版本中的一个 GCC 错误,请参阅 PR 64476 .默认 -std=gnu++03 模式和 -std=c++14 之间的区别在于,对于 C++11 及更高版本,它可能具有普通类型不可赋值(因为它们可以有一个已删除的赋值运算符),这会导致 std::uninitialized_copy 的实现采用不同的(较慢的)代码路径。可分配性检查是错误的,这意味着我们在不需要时采取了缓慢的路径。

两年前我为 GCC 4.9.3 修复了它,但您的编译器基于 4.9.2 和 4.9.3 版本之间的快照,并且已经过了几周无法修复。

您可以要求 Linaro 将他们的 GCC 4.9 编译器更新到 4.9.4,或者至少应用修复此错误的补丁。

关于c++ - std::vector<uint8_t> 在启用 C++11/14 时手动复制而不是调用 memcpy,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41989891/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com