gpt4 book ai didi

c - 在thumb模式下编译时的GCC arm指令模式

转载 作者:行者123 更新时间:2023-12-03 14:07:50 43 4
gpt4 key购买 nike

我想知道如何使用 --with-mode=thumb 配置 GCC如果 -marm 处理使用 ARM 模式部分的编译/汇编代码未指定标志。那是:

  • GCC 是用 --with-mode=thumb 编译的
  • 一个程序在没有 -marm 的情况下被编译(默认为拇指模式)
  • 该程序的汇编部分使用 ARM 模式

  • 我尝试使用 Ubuntu 18.04.4 内核 5.3.0-1018-raspi2 在 Raspberry Pi 4 上编译一个小测试程序,并注意到 .arm部分正在 16 位拇指指令模式下执行,这促使我对此进行调查。这自然会导致段错误,因为程序计数器增加了 2 个字节而不是 4 个字节。

    这是 layout asm 中的 gdb mode 表示当我的程序分支到 .arm 汇编代码中并且在我执行单个 stepi 之后命令:
    0x400900 <asm_maxfilter>        push   {r4, lr}
    0x400904 <asm_maxfilter+4> mov r3, #0
    0x400908 <filter_loop> vld1.8 {d0-d1}, [r0]

    pc 0x400902 0x400902 <asm_maxfilter+2>
    ^ The program counter is between instructions

    我的代码如下:
    #include <arm_neon.h>
    #include <stdlib.h>
    #include <string.h>
    #include <stdio.h>

    void asm_maxfilter(unsigned char* upbuffer, unsigned char* longterm_buffer, int grid_size);

    int main(int argc, char** argv) {

    const int pixels_per = 16;
    const int grid_reso = 256;
    const int grid_size = grid_reso * grid_reso;
    const int remainder = grid_size % pixels_per;
    const int work_count = grid_size - remainder;

    unsigned char* longterm_up = (unsigned char*)malloc(grid_reso * grid_reso);
    memset(longterm_up, 0, grid_reso * grid_reso);

    unsigned char* up_buffers[60];
    int u;
    int i;

    for (u = 0; u < 60; ++u) {
    up_buffers[u] = (unsigned char*)malloc(grid_reso * grid_reso);

    if (up_buffers[u] == NULL) {
    fprintf(stderr, "Failed mallocing\n");
    return 1;
    }

    memset(up_buffers[u], 0, grid_reso * grid_reso);
    }

    for (u = 0; u < 60; ++u) {

    asm_maxfilter(up_buffers[u], longterm_up, work_count);

    // non-SIMD version handles the remainder that did not fit in NEON registers
    for (i = grid_size - remainder; i < grid_size; ++i) {
    if (longterm_up[i] < up_buffers[u][i]) {
    longterm_up[i] = up_buffers[u][i];
    }
    }
    }

    for (u = 0; u < 60; ++u) {
    free(up_buffers[u]);
    }

    free(longterm_up);

    return 0;
    }

    集会:
    @ ARM NEON version of a max filter. Performs the following operation:
    @
    @ for (int i = 0; i < buf_size; ++i) {
    @ if (buf_b[i] < buf_a[i]) {
    @ buf_b[i] = buf_a[i];
    @ }
    @ }

    .arm
    .section .text
    .align 4
    .globl asm_maxfilter

    @ parameters
    @ r0: buf_a
    @ r1: buf_b
    @ r2: buf_size, multiple of 16
    asm_maxfilter:

    @ Store register states in stack. They must be restored before returning
    push { r4, lr }

    @ Reset counter
    mov r3, #0

    filter_loop:

    @ Load 16 bytes into vectors
    vld1.u8 {q0}, [r0]
    vld1.u8 {q1}, [r1]

    @ Find greater values in each vector
    vcgt.u8 q2, q0, q1

    @ Bitselect the greater value into q2
    vbsl.u8 q2, q0, q1

    @ Store the larger value in output buffer
    vst1.u8 {q2}, [r1]

    @ Increment counter by 16
    add r3, r3, #16

    @ Increment pointers
    add r0, r0, #16
    add r1, r1, #16

    @ Check if loop is done
    cmp r3, r2
    blt filter_loop

    @ Restore registers to their original state
    pop { r4, lr }

    @ lr register contains return address
    bx lr

    .end

    代码编译使用:
    gcc -Wall -Wpedantic -O0 -g -march=armv8-a -mfloat-abi=hard -mtune=cortex-a72 -mfpu=neon -c -o main.o main.c
    gcc -Wall -Wpedantic -O0 -g -march=armv8-a -mfloat-abi=hard -mtune=cortex-a72 -mfpu=neon -o neon_test ./main.o ./asm_test.s

    根据 ARM 文档的说明,如果处理器需要在拇指/ ARM 之间切换,程序应该使用 BLX 执行分支。或 BX操作说明:

    https://developer.arm.com/docs/100076/0100/instruction-set-overview/overview-of-aarch32-state/changing-between-a32-and-t32-instruction-set-states

    引用:
    To direct armasm to generate A32 or T32 instruction encodings, you must set the assembler mode using an ARM or THUMB directive. Assembly code using CODE32 and CODE16 directives can still be assembled, but Arm recommends you use the ARM and THUMB directives for new code.

    These directives do not change the instruction set state of the processor. To do this, you must use an appropriate instruction, for example BX or BLX to change between A32 and T32 states when performing a branch.

    反汇编我的程序后,我发现这种模式切换没有完成。这是程序员必须在他们的汇编代码中自己做的事情(即使分支发生在 C 代码中),还是编译器/汇编器应该处理这个?

    我也尝试指定 __attribute__((target("arm")))在C文件函数声明中,即:
    __attribute__((target("arm")))
    void asm_maxfilter(unsigned char* upbuffer, unsigned char* longterm_buffer, int grid_size);

    然而,这似乎并没有改变任何事情。我用 -marm 编译后一切正常或者使用没有 --with-mode=thumb 的 GCC

    最佳答案

    正如 old_timer 在评论中所建议的,问题是汇编源代码不包括 .type asm_maxfilter, %function在标签之前。工作汇编代码开始如下:

    .arm
    .section .text
    .align 4
    .globl asm_maxfilter

    .type asm_maxfilter, %function
    asm_maxfilter:

    @ Store register states in stack. They must be restored before returning
    push { r4, lr }

    @ Reset counter
    mov r3, #0
    ...

    如果情况相反(使用拇指功能的ARM模式程序),则代替 .type asm_maxfilter, %function类型应该是 .thumb_func .

    根据 Jester 的回复,我注意到 C 代码目标文件确实有一个 R_ARM_THM_CALL重定位段,但不使用 .type宏,分支指令没有被 bx 取代操作说明。

    如果使用 __attribute__((target("arm"))) 在 C 文件中实现 ARM 函数无需外部组装,即:
    #include <stdio.h>
    #include <stdlib.h>

    __attribute__((target("arm")))
    void foo(int a) {
    int b = 6*a;
    fprintf(stderr, "%d\n", b*5);
    }

    int main(int argc, char** argv) {
    int asd = atoi(argv[1]);
    foo(asd);
    return 0;
    }

    然后可以观察 blx指令在生成的二进制文件中正确使用。如果在不通过编译器的单独文件中使用汇编代码,我遇到的问题只是一个问题。

    关于c - 在thumb模式下编译时的GCC arm指令模式,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60618779/

    43 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com