gpt4 book ai didi

c++ - 超过 5 个参数的绳索表达模板无法正确内联

转载 作者:塔克拉玛干 更新时间:2023-11-03 00:48:53 27 4
gpt4 key购买 nike

我一直在尝试使用富有表现力的模板和绳索数据结构来尝试看看可以实现什么样的 yield 。到目前为止,它工作得很好。但是,当尝试将超过 5 个参数连接在一起时,编译器无法正确优化并生成不必要的临时变量。有人能告诉我为什么会这样吗?是编译器限制还是我的优化选项需要调整?

我正在使用具有以下选项的 g++ 4.4.1 (mingw32):-O3 -Winline -Wextra -Wall -fno-exceptions -fno-rtti -fomit-frame-pointer -fexpensive-optimizations -fverbose-asm -S

代码如下,只是一个实验,没有真正遵循任何标准:

#include <stdio.h>

template<typename Derived>
struct rope_base {
const Derived & ref() const;
};

struct string {
size_t m_length;
char * m_value;

template<typename Derived>
string(const rope_base<Derived> & rope);
~string();

const char * data() const;
size_t length() const;
char * write_to(char * dst) const;
};

struct static_string {
const char * m_value;
const size_t m_length;

static_string(const char * value);

size_t length() const;
char * write_to(char * dst) const;
};

template<typename T>
struct rope_traits {
typedef const T type;
};

template<>
struct rope_traits<string> {
typedef const string & type;
};

template<>
struct rope_traits<static_string> {
typedef const static_string & type;
};

template<typename Left, typename Right>
struct rope : public rope_base<rope<Left, Right> > {
typename rope_traits<Left>::type m_left;
typename rope_traits<Right>::type m_right;

rope(const Left & left, const Right & right);

size_t length() const;
char * write_to(char * dst) const;
};

inline static_string::static_string(const char * value)
: m_value(value)
, m_length(__builtin_strlen(m_value)) {}

inline size_t static_string::length() const {
return m_length;
}

inline char * static_string::write_to(char * dst) const {
__builtin_memcpy(dst, m_value, m_length);
return dst + m_length;
}

template<typename Derived>
inline string::string(const rope_base<Derived> & rope)
: m_length(rope.ref().length())
, m_value(new char[m_length + 1]) {
*rope.ref().write_to(m_value) = 0;
}

inline string::~string() {
delete[] m_value;
}

inline const char * string::data() const {
return m_value;
}

inline size_t string::length() const {
return m_length;
}

template<typename Derived>
inline const Derived & rope_base<Derived>::ref() const {
return static_cast<const Derived &>(*this);
}

template<typename Left, typename Right>
inline rope<Left, Right>::rope(const Left & left, const Right & right)
: m_left(left)
, m_right(right) {}

template<typename Left, typename Right>
inline size_t rope<Left, Right>::length() const {
return m_left.length() + m_right.length();
}

template<typename Left, typename Right>
inline char * rope<Left, Right>::write_to(char * dst) const {
return m_right.write_to(m_left.write_to(dst));
}

inline rope<static_string, static_string> operator+(const static_string & left, const static_string & right) {
return rope<static_string, static_string>(left, right);
}

template<typename Left>
inline rope<Left, static_string> operator+(const rope_base<Left> & left, const static_string & right) {
return rope<Left, static_string>(left.ref(), right);
}

template<typename Right>
inline rope<static_string, Right> operator+(const static_string & left, const rope_base<Right> & right) {
return rope<static_string, Right>(left, right.ref());
}

template<typename Left, typename Right>
inline rope<Left, Right> operator+(const rope_base<Left> & left, const rope_base<Right> & right) {
return rope<Left, Right>(left.ref(), right.ref());
}

typedef static_string ss;

int main(int, char **)
{
// works up to 5
string s(ss("111111111111") + "222222222222" + "333333333333" + "444444444444" + "555555555555");
printf("%d %s\n", s.length(), s.data());
return 0;
}

上面的代码生成了相当不错的汇编器输出,它是完全内联的,所有参数都被简化为常量:

.def    ___main;    .scl    2;    .type    32;    .endef
.section .rdata,"dr"
LC0:
.ascii "444444444444\0"
LC1:
.ascii "333333333333\0"
LC2:
.ascii "222222222222\0"
LC3:
.ascii "111111111111\0"
LC4:
.ascii "555555555555\0"
LC5:
.ascii "%d %s\12\0"
.text
.p2align 2,,3
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp #
movl %esp, %ebp #,
andl $-16, %esp #,
pushl %edi #
pushl %esi #
pushl %ebx #
subl $20, %esp #,
call ___main #
movl $LC3, %esi #, D.2495
movl $61, (%esp) #,
call __Znaj #
movl %eax, %ebx #, D.3126
movl $3, %ecx #, tmp74
movl %eax, %edi # D.3126, D.3125
rep movsl
leal 12(%eax), %eax #, D.3180
movb $3, %cl #,
movl %eax, %edi # D.3180, D.3180
movl $LC2, %esi #, D.2496
rep movsl
leal 24(%ebx), %eax #, D.3186
movb $3, %cl #,
movl %eax, %edi # D.3186, D.3186
movl $LC1, %esi #, D.2502
rep movsl
leal 36(%ebx), %eax #, D.3192
movb $3, %cl #,
movl %eax, %edi # D.3192, D.3192
movl $LC0, %esi #, D.2539
rep movsl
leal 48(%ebx), %eax #, D.3198
movl $LC4, %esi #, tmp87
movb $3, %cl #,
movl %eax, %edi # D.3198, D.3198
rep movsl
movb $0, 12(%eax) #,
movl %ebx, 8(%esp) # D.3126,
movl $60, 4(%esp) #,
movl $LC5, (%esp) #,
call _printf #
testl %ebx, %ebx # D.3126
je L2 #,
movl %ebx, (%esp) # D.3126,
call __ZdaPv #
L2:
xorl %eax, %eax #
addl $20, %esp #,
popl %ebx #
popl %esi #
popl %edi #
leave
ret
.def __Znaj; .scl 2; .type 32; .endef
.def _printf; .scl 2; .type 32; .endef
.def __ZdaPv; .scl 2; .type 32; .endef

向连接添加一个或多个参数时,内联失败,导致临时变量被复制,参数被视为变量:

    .def    ___main;    .scl    2;    .type    32;    .endef
.section .rdata,"dr"
LC0:
.ascii "777777777777\0"
LC1:
.ascii "666666666666\0"
LC2:
.ascii "555555555555\0"
LC3:
.ascii "444444444444\0"
LC4:
.ascii "333333333333\0"
LC5:
.ascii "222222222222\0"
LC6:
.ascii "111111111111\0"
LC7:
.ascii "888888888888\0"
LC8:
.ascii "%d %s\12\0"
.text
.p2align 2,,3
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp #
movl %esp, %ebp #,
andl $-16, %esp #,
pushl %edi #
pushl %esi #
pushl %ebx #
subl $228, %esp #,
call ___main #
movl $LC0, 168(%esp) #, D.2650.m_value
movl $12, 172(%esp) #, D.2650.m_length
movl $LC1, 176(%esp) #, D.2613.m_value
movl $12, 180(%esp) #, D.2613.m_length
movl $LC2, 184(%esp) #, D.2576.m_value
movl $12, 188(%esp) #, D.2576.m_length
movl $LC3, 192(%esp) #, D.2539.m_value
movl $12, 196(%esp) #, D.2539.m_length
movl $LC4, 200(%esp) #, D.2502.m_value
movl $12, 204(%esp) #, D.2502.m_length
movl $LC5, 208(%esp) #, D.2496.m_value
movl $12, 212(%esp) #, D.2496.m_length
movl $LC6, 216(%esp) #, D.2495.m_value
movl $12, 220(%esp) #, D.2495.m_length
leal 216(%esp), %eax #, tmp78
movl %eax, 152(%esp) # tmp78, D.2571.m_left.m_left.m_left
leal 208(%esp), %eax #, tmp79
movl %eax, 156(%esp) # tmp79, D.2571.m_left.m_left.m_right
leal 200(%esp), %eax #, tmp80
movl %eax, 160(%esp) # tmp80, D.2571.m_left.m_right
leal 192(%esp), %eax #, tmp81
movl %eax, 164(%esp) # tmp81, D.2571.m_right
leal 132(%esp), %edi #, tmp82
leal 152(%esp), %esi #, tmp83
movl $4, %ecx #, tmp84
rep movsl
leal 184(%esp), %eax #, tmp85
movl %eax, 148(%esp) # tmp85, D.2608.m_right
leal 108(%esp), %edi #, tmp86
leal 132(%esp), %esi #, tmp87
movb $5, %cl #,
rep movsl
leal 176(%esp), %eax #, tmp89
movl %eax, 128(%esp) # tmp89, D.2645.m_right
leal 80(%esp), %edi #, tmp90
leal 108(%esp), %esi #, tmp91
movb $6, %cl #,
rep movsl
leal 168(%esp), %eax #, tmp93
movl %eax, 104(%esp) # tmp93, D.2682.m_right
leal 48(%esp), %edi #, tmp94
leal 80(%esp), %esi #, tmp95
movb $7, %cl #,
rep movsl
movl 48(%esp), %ebx # D.2719.m_left.m_left.m_left.m_left.m_left.m_left.m_left, SR.35
movl 52(%esp), %edx # D.2719.m_left.m_left.m_left.m_left.m_left.m_left.m_right, SR.34
movl 56(%esp), %eax # D.2719.m_left.m_left.m_left.m_left.m_left.m_right,
movl %eax, 36(%esp) #, %sfp
movl 60(%esp), %eax # D.2719.m_left.m_left.m_left.m_left.m_right,
movl %eax, 32(%esp) #, %sfp
movl 64(%esp), %eax # D.2719.m_left.m_left.m_left.m_right,
movl %eax, 28(%esp) #, %sfp
movl 68(%esp), %eax # D.2719.m_left.m_left.m_right,
movl %eax, 24(%esp) #, %sfp
movl 72(%esp), %eax # D.2719.m_left.m_right,
movl %eax, 20(%esp) #, %sfp
movl 4(%ebx), %eax # <variable>.m_length, tmp97
addl 4(%edx), %eax # <variable>.m_length, tmp97
addl $12, %eax #,
movl %eax, 44(%esp) #, %sfp
movl 36(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 32(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 28(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 24(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 20(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 44(%esp), %eax # %sfp, tmp105
incl %eax # tmp105
movl %eax, (%esp) # tmp105,
movl %edx, 16(%esp) #,
call __Znaj #
movl %eax, 40(%esp) #, %sfp
movl (%ebx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%ebx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi #, D.3662
rep movsb
movl 40(%esp), %eax # %sfp, D.3735
addl 4(%ebx), %eax # <variable>.m_length, D.3735
movl 16(%esp), %edx #,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3735, D.3735
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3741
movl 36(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3741, D.3741
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3747
movl 32(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3747, D.3747
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3753
movl 28(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3753, D.3753
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3759
movl 24(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3759, D.3759
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3765
movl 20(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3765, D.3765
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3771
movl $LC7, %esi #, tmp148
movb $3, %cl #,
movl %eax, %edi # D.3771, D.3771
rep movsl
movb $0, 12(%eax) #,
movl 40(%esp), %eax # %sfp,
movl %eax, 8(%esp) #,
movl 44(%esp), %edx # %sfp,
movl %edx, 4(%esp) #,
movl $LC8, (%esp) #,
call _printf #
movl 40(%esp), %eax # %sfp,
testl %eax, %eax #
je L2 #,
movl 40(%esp), %eax # %sfp,
movl %eax, (%esp) #,
call __ZdaPv #
L2:
xorl %eax, %eax #
addl $228, %esp #,
popl %ebx #
popl %esi #
popl %edi #
leave
ret
.def __Znaj; .scl 2; .type 32; .endef
.def _printf; .scl 2; .type 32; .endef
.def __ZdaPv; .scl 2; .type 32; .endef

最佳答案

一些事情(根据评论中的讨论):

升级你的 gcc。 4.6 系列中做了很多优化改进,包括对内联的一些改进。

使用较新的 gcc -Winline 会警告您的字符串构造函数不会被内联:

warning: inlining failed in call to 'string::string(const rope_base<Derived>&) [with Derived = rope<rope<rope<rope<rope<static_string, static_string>, static_string> static_string>, static_string>, static_string>]': call is unlikely and code size would grow [-Winline]

我不太确定为什么 gcc 会给出这个特定的消息(这通常与不太可能的分支中的内联调用非常明显相关),但问题的根源是在该构造函数中调用 new()。使用静态固定大小的缓冲区会产生很好的紧凑型 asm。使用 malloc 或在非内联函数调用中包装 new 允许内联构造函数(但不是 malloc/函数)。

具体采用哪种方法取决于您的具体用例。如果在编译时可以知道字符串的长度,或者给定一个最大值,如您的示例所示,那么您显然可以完全内联。但在一般情况下,您总是不得不放弃一些东西。

关于c++ - 超过 5 个参数的绳索表达模板无法正确内联,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/14704131/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com