gpt4 book ai didi

c - 为什么GCC在编译C代码时不使用更多寄存器

转载 作者:行者123 更新时间:2023-11-30 21:46:18 27 4
gpt4 key购买 nike

在Ubuntu上使用ARM CPU的跨平台GCC编译C代码时,对于每条汇编指令,将从堆栈加载源数据并将结果写入堆栈。为什么GCC不使用更多的寄存器来减少加载/存储数据的时间?是不是因为ARM是load and store结构,GCC对每一条汇编指令的数据都会使用load/store的方式?

以下是我的冒泡排序函数:

bool bubble_sort(int* data, int dataSize) {
int i;
int j;
int dataTmp;

for(i=0; i<dataSize-1; i++) {
for(j=0; j<dataSize-1; j++) {
if(data[j] > data[j+1]) {
dataTmp = data[j+1];
data[j+1] = data[j];
data[j] = dataTmp;
}
}
}
return true;
}

以下是上述C代码在Ubuntu上使用跨平台GCC工具编译的汇编代码:

00008d0c <bubble_sort>:
8d0c: e52db004 push {fp} ; (str fp, [sp, #-4]!)
8d10: e28db000 add fp, sp, #0
8d14: e24dd01c sub sp, sp, #28
8d18: e50b0018 str r0, [fp, #-24]
8d1c: e50b101c str r1, [fp, #-28]
8d20: e3a03000 mov r3, #0
8d24: e50b3010 str r3, [fp, #-16]
8d28: ea000032 b 8df8 <bubble_sort+0xec>
8d2c: e3a03000 mov r3, #0
8d30: e50b300c str r3, [fp, #-12]
8d34: ea000027 b 8dd8 <bubble_sort+0xcc>
8d38: e51b300c ldr r3, [fp, #-12]
8d3c: e1a03103 lsl r3, r3, #2
8d40: e51b2018 ldr r2, [fp, #-24]
8d44: e0823003 add r3, r2, r3
8d48: e5932000 ldr r2, [r3]
8d4c: e51b300c ldr r3, [fp, #-12]
8d50: e2833001 add r3, r3, #1
8d54: e1a03103 lsl r3, r3, #2
8d58: e51b1018 ldr r1, [fp, #-24]
8d5c: e0813003 add r3, r1, r3
8d60: e5933000 ldr r3, [r3]
8d64: e1520003 cmp r2, r3
8d68: da000017 ble 8dcc <bubble_sort+0xc0>
8d6c: e51b300c ldr r3, [fp, #-12]
8d70: e2833001 add r3, r3, #1
8d74: e1a03103 lsl r3, r3, #2
8d78: e51b2018 ldr r2, [fp, #-24]
8d7c: e0823003 add r3, r2, r3
8d80: e5933000 ldr r3, [r3]
8d84: e50b3008 str r3, [fp, #-8]
8d88: e51b300c ldr r3, [fp, #-12]
8d8c: e2833001 add r3, r3, #1
8d90: e1a03103 lsl r3, r3, #2
8d94: e51b2018 ldr r2, [fp, #-24]
8d98: e0823003 add r3, r2, r3
8d9c: e51b200c ldr r2, [fp, #-12]
8da0: e1a02102 lsl r2, r2, #2
8da4: e51b1018 ldr r1, [fp, #-24]
8da8: e0812002 add r2, r1, r2
8dac: e5922000 ldr r2, [r2]
8db0: e5832000 str r2, [r3]
8db4: e51b300c ldr r3, [fp, #-12]
8db8: e1a03103 lsl r3, r3, #2
8dbc: e51b2018 ldr r2, [fp, #-24]
8dc0: e0823003 add r3, r2, r3
8dc4: e51b2008 ldr r2, [fp, #-8]
8dc8: e5832000 str r2, [r3]
8dcc: e51b300c ldr r3, [fp, #-12]
8dd0: e2833001 add r3, r3, #1
8dd4: e50b300c str r3, [fp, #-12]
8dd8: e51b301c ldr r3, [fp, #-28]
8ddc: e2432001 sub r2, r3, #1
8de0: e51b300c ldr r3, [fp, #-12]
8de4: e1520003 cmp r2, r3
8de8: caffffd2 bgt 8d38 <bubble_sort+0x2c>
8dec: e51b3010 ldr r3, [fp, #-16]
8df0: e2833001 add r3, r3, #1
8df4: e50b3010 str r3, [fp, #-16]
8df8: e51b301c ldr r3, [fp, #-28]
8dfc: e2432001 sub r2, r3, #1
8e00: e51b3010 ldr r3, [fp, #-16]
8e04: e1520003 cmp r2, r3
8e08: caffffc7 bgt 8d2c <bubble_sort+0x20>
8e0c: e3a03001 mov r3, #1
8e10: e1a00003 mov r0, r3
8e14: e28bd000 add sp, fp, #0
8e18: e8bd0800 ldmfd sp!, {fp}
8e1c: e12fff1e bx lr

以下是我使用内联汇编编写的代码,以使用更多寄存器来减少加载/存储时间:

void bubble_sort(int *data, int size) {
//r2 = i
//r3 = j
asm volatile(
" STMDB SP!, {r0-r7} \n\t"
" "
" SUB r1, r1, #1 \n\t" // r3 = size-1
" LSL r1, r1, #2 \n\t" // int
" "
" MOV r2, #0 \n\t" // r2 = i = 0
" "
"loop1: \n\t"
" CMP r2, r1 \n\t" // r2 == r3: i == size-1
" BEQ loop_end \n\t"
" "
" MOV r3, #0 \n\t" // r3 = j = 1
" MOV r4, r0 \n\t" // r4 = data
" ADD r5, r4, #4 \n\t" // r5 = data + 4
" "
"loop2: \n\t"
" CMP r3, r1 \n\t" // r3 == r1: j==size
" BEQ loop1_end \n\t"
" "
" LDR r6, [r4], #0 \n\t" //data[j-1]
" LDR r7, [r5], #0 \n\t" //data[j]
" CMP r6, r7 \n\t"
" BLE incr_addr \n\t"
" "
" STR r6, [r5], #4 \n\t" //swp data, address + 4
" STR r7, [r4], #4 \n\t"
" B loop2_end \n\t"
" "
"incr_addr: \n\t"
" ADD r4, r4, #4 \n\t" // r4 = r4 + 4
" ADD r5, r5, #4 \n\t" // r5 = r5 + 4
" "
"loop2_end: \n\t"
" ADD r3, r3, #4 \n\t"
" B loop2 \n\t"
" "
"loop1_end: \n\t"
" ADD r2, r2, #4 \n\t"
" B loop1 \n\t"
" "
"loop_end: \n\t"
" "
" LDMIA SP!,{r0-r7} "
);
//back to caller
//" BX lr "
}

最佳答案

使用优化-O3,gcc将使用更多寄存器。

00008e10 <bubble_sort>:
8e10: e92d0030 push {r4, r5}
8e14: e2414001 sub r4, r1, #1
8e18: e3540000 cmp r4, #0
8e1c: da00000d ble 8e58 <bubble_sort+0x48>
8e20: e080c101 add ip, r0, r1, lsl #2
8e24: e2805004 add r5, r0, #4
8e28: e3a00000 mov r0, #0
8e2c: e1a03005 mov r3, r5
8e30: e5131004 ldr r1, [r3, #-4]
8e34: e4932004 ldr r2, [r3], #4
8e38: e1510002 cmp r1, r2
8e3c: c5031004 strgt r1, [r3, #-4]
8e40: c5032008 strgt r2, [r3, #-8]
8e44: e153000c cmp r3, ip
8e48: 1afffff8 bne 8e30 <bubble_sort+0x20>
8e4c: e2800001 add r0, r0, #1
8e50: e1500004 cmp r0, r4
8e54: 1afffff4 bne 8e2c <bubble_sort+0x1c>
8e58: e3a00001 mov r0, #1
8e5c: e8bd0030 pop {r4, r5}
8e60: e12fff1e bx lr

关于c - 为什么GCC在编译C代码时不使用更多寄存器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31573651/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com