gpt4 book ai didi

c++ - 寻找搬迁的起源

转载 作者:塔克拉玛干 更新时间:2023-11-02 23:14:20 25 4
gpt4 key购买 nike

使用Ulrich Drepper的relinfo.pl脚本,可以轻松计算DSO的重定位次数,但不适用于.o文件。

假设我有一个大型共享库,但对它的重定位数量不满意。有没有办法找出它们的来源(符号,或者至少是.o),以检查它们是否属于易于修复的类型(例如:const char * str = "Hello World";'-> const char str[] = "Hello World";)?

最佳答案

简短答案:请改为使用 objdump readelf

长答案:让我们看一个实际的示例案例example.c:

#include <stdio.h>

static const char global1[] = "static const char []";
static const char *global2 = "static const char *";
static const char *const global3 = "static const char *const";
const char global4[] = "const char []";
const char *global5 = "const char *";
const char *const global6 = "const char *const";
char global7[] = "char []";
char *global8 = "char *";
char *const global9 = "char *const";

int main(void)
{
static const char local1[] = "static const char []";
static const char *local2 = "static const char *";
static const char *const local3 = "static const char *const";
const char local4[] = "const char []";
const char *local5 = "const char *";
const char *const local6 = "const char *const";
char local7[] = "char []";
char *local8 = "char *";
char *const local9 = "char *const";

printf("Global:\n");
printf("\t%s\n", global1);
printf("\t%s\n", global2);
printf("\t%s\n", global3);
printf("\t%s\n", global4);
printf("\t%s\n", global5);
printf("\t%s\n", global6);
printf("\t%s\n", global7);
printf("\t%s\n", global8);
printf("\t%s\n", global9);
printf("\n");
printf("Local:\n");
printf("\t%s\n", local1);
printf("\t%s\n", local2);
printf("\t%s\n", local3);
printf("\t%s\n", local4);
printf("\t%s\n", local5);
printf("\t%s\n", local6);
printf("\t%s\n", local7);
printf("\t%s\n", local8);
printf("\t%s\n", local9);

return 0;
}

您可以使用例如将其编译为目标文件
gcc -W -Wall -c example.c

并使用
gcc -W -Wall example.c -o example

您可以使用 objdump -tr example.o转储(非动态)目标文件的符号和重定位信息,或使用 objdump -TtRr example转储可执行文件(和动态目标文件)的符号和重定位信息。使用
objdump -t example.o

在x86-64上我得到
example.o:     file format elf64-x86-64

SYMBOL TABLE:
0000000000000000 l df *ABS* 0000000000000000 example.c
0000000000000000 l d .text 0000000000000000 .text
0000000000000000 l d .data 0000000000000000 .data
0000000000000000 l d .bss 0000000000000000 .bss
0000000000000000 l d .rodata 0000000000000000 .rodata
0000000000000000 l O .rodata 0000000000000015 global1
0000000000000000 l O .data 0000000000000008 global2
0000000000000048 l O .rodata 0000000000000008 global3
00000000000000c0 l O .rodata 0000000000000015 local1.2053
0000000000000020 l O .data 0000000000000008 local2.2054
00000000000000d8 l O .rodata 0000000000000008 local3.2055
0000000000000000 l d .note.GNU-stack 0000000000000000 .note.GNU-stack
0000000000000000 l d .eh_frame 0000000000000000 .eh_frame
0000000000000000 l d .comment 0000000000000000 .comment
0000000000000050 g O .rodata 000000000000000e global4
0000000000000008 g O .data 0000000000000008 global5
0000000000000080 g O .rodata 0000000000000008 global6
0000000000000010 g O .data 0000000000000008 global7
0000000000000018 g O .data 0000000000000008 global8
00000000000000a0 g O .rodata 0000000000000008 global9
0000000000000000 g F .text 000000000000027a main
0000000000000000 *UND* 0000000000000000 puts
0000000000000000 *UND* 0000000000000000 printf
0000000000000000 *UND* 0000000000000000 putchar
0000000000000000 *UND* 0000000000000000 __stack_chk_fail

输出在 man 1 objdump标题下的 -t 中描述。请注意,第二个“列”实际上是固定宽度的:七个字符宽,描述了对象的类型。第三列是节名称,用于未定义的 *UND*,用于代码的 .text,用于只读(不可变)数据的 .rodata,用于初始化的可变数据的 .data和用于未初始化的可变数据的 .bss,等等。

从上面的符号表中我们可以看到 local4local5local6local7local8local9变量实际上根本没有在符号表中获得条目。这是因为它们对于 main()是本地的。它们引用的字符串的内容存储在 .data.rodata中(或动态构建),具体取决于编译器的最佳效果。

接下来让我们看看重定位记录。使用
objdump -r example.o

我懂了
example.o:     file format elf64-x86-64

RELOCATION RECORDS FOR [.text]:
OFFSET TYPE VALUE
0000000000000037 R_X86_64_32S .rodata+0x000000000000005e
0000000000000040 R_X86_64_32S .rodata+0x000000000000006b
0000000000000059 R_X86_64_32S .rodata+0x0000000000000088
0000000000000062 R_X86_64_32S .rodata+0x000000000000008f
0000000000000067 R_X86_64_32 .rodata+0x00000000000000a8
000000000000006c R_X86_64_PC32 puts-0x0000000000000004
0000000000000071 R_X86_64_32 .rodata+0x00000000000000b0
0000000000000076 R_X86_64_32 .rodata
0000000000000083 R_X86_64_PC32 printf-0x0000000000000004
000000000000008a R_X86_64_PC32 .data-0x0000000000000004
000000000000008f R_X86_64_32 .rodata+0x00000000000000b0
000000000000009f R_X86_64_PC32 printf-0x0000000000000004
00000000000000a6 R_X86_64_PC32 .rodata+0x0000000000000044
00000000000000ab R_X86_64_32 .rodata+0x00000000000000b0
00000000000000bb R_X86_64_PC32 printf-0x0000000000000004
00000000000000c0 R_X86_64_32 .rodata+0x00000000000000b0
00000000000000c5 R_X86_64_32 global4
00000000000000d2 R_X86_64_PC32 printf-0x0000000000000004
00000000000000d9 R_X86_64_PC32 global5-0x0000000000000004
00000000000000de R_X86_64_32 .rodata+0x00000000000000b0
00000000000000ee R_X86_64_PC32 printf-0x0000000000000004
00000000000000f5 R_X86_64_PC32 global6-0x0000000000000004
00000000000000fa R_X86_64_32 .rodata+0x00000000000000b0
000000000000010a R_X86_64_PC32 printf-0x0000000000000004
000000000000010f R_X86_64_32 .rodata+0x00000000000000b0
0000000000000114 R_X86_64_32 global7
0000000000000121 R_X86_64_PC32 printf-0x0000000000000004
0000000000000128 R_X86_64_PC32 global8-0x0000000000000004
000000000000012d R_X86_64_32 .rodata+0x00000000000000b0
000000000000013d R_X86_64_PC32 printf-0x0000000000000004
0000000000000144 R_X86_64_PC32 global9-0x0000000000000004
0000000000000149 R_X86_64_32 .rodata+0x00000000000000b0
0000000000000159 R_X86_64_PC32 printf-0x0000000000000004
0000000000000163 R_X86_64_PC32 putchar-0x0000000000000004
0000000000000168 R_X86_64_32 .rodata+0x00000000000000b5
000000000000016d R_X86_64_PC32 puts-0x0000000000000004
0000000000000172 R_X86_64_32 .rodata+0x00000000000000b0
0000000000000177 R_X86_64_32 .rodata+0x00000000000000c0
0000000000000184 R_X86_64_PC32 printf-0x0000000000000004
000000000000018b R_X86_64_PC32 .data+0x000000000000001c
0000000000000190 R_X86_64_32 .rodata+0x00000000000000b0
00000000000001a0 R_X86_64_PC32 printf-0x0000000000000004
00000000000001a7 R_X86_64_PC32 .rodata+0x00000000000000d4
00000000000001ac R_X86_64_32 .rodata+0x00000000000000b0
00000000000001bc R_X86_64_PC32 printf-0x0000000000000004
00000000000001c1 R_X86_64_32 .rodata+0x00000000000000b0
00000000000001d6 R_X86_64_PC32 printf-0x0000000000000004
00000000000001db R_X86_64_32 .rodata+0x00000000000000b0
00000000000001ef R_X86_64_PC32 printf-0x0000000000000004
00000000000001f4 R_X86_64_32 .rodata+0x00000000000000b0
0000000000000209 R_X86_64_PC32 printf-0x0000000000000004
000000000000020e R_X86_64_32 .rodata+0x00000000000000b0
0000000000000223 R_X86_64_PC32 printf-0x0000000000000004
0000000000000228 R_X86_64_32 .rodata+0x00000000000000b0
000000000000023d R_X86_64_PC32 printf-0x0000000000000004
0000000000000242 R_X86_64_32 .rodata+0x00000000000000b0
0000000000000257 R_X86_64_PC32 printf-0x0000000000000004
0000000000000271 R_X86_64_PC32 __stack_chk_fail-0x0000000000000004


RELOCATION RECORDS FOR [.data]:
OFFSET TYPE VALUE
0000000000000000 R_X86_64_64 .rodata+0x0000000000000015
0000000000000008 R_X86_64_64 .rodata+0x000000000000005e
0000000000000018 R_X86_64_64 .rodata+0x0000000000000088
0000000000000020 R_X86_64_64 .rodata+0x0000000000000015


RELOCATION RECORDS FOR [.rodata]:
OFFSET TYPE VALUE
0000000000000048 R_X86_64_64 .rodata+0x0000000000000029
0000000000000080 R_X86_64_64 .rodata+0x000000000000006b
00000000000000a0 R_X86_64_64 .rodata+0x000000000000008f
00000000000000d8 R_X86_64_64 .rodata+0x0000000000000029


RELOCATION RECORDS FOR [.eh_frame]:
OFFSET TYPE VALUE
0000000000000020 R_X86_64_PC32 .text

重定位记录按其重定位所在的部分分组。由于字符串内容位于 .data.rodata部分中,因此我们可以限制自己查看 VALUE.data.rodata开头的重定位。 (可变字符串,如 char global7[] = "char []";,存储在 .data中,不可变的字符串和字符串文字存储在 .rodata中。)

如果要在启用了调试符号的情况下编译代码,则可以更轻松地确定使用哪个变量来引用哪个字符串,但是我可能只是查看每个重定位值(目标)上的实际内容,以查看对哪个重定位的引用。不可变的字符串需要修复。

命令组合
objdump -r example.o | awk '($3 ~ /^\..*\+/) { t = $3; sub(/\+/, " ", t); n[t]++ } END { for (r in n) printf "%d %s\n", n[r], r }' | sort -g

将输出每个目标的重定位数量,然后是目标部分,然后是该部分中的目标偏移量,并与在重定位中出现次数最多的目标排序。也就是说,上面输出的最后几行是您需要集中精力的。对我来说
1 .rodata
1 .rodata 0x0000000000000044
1 .rodata 0x00000000000000a8
1 .rodata 0x00000000000000b5
1 .rodata 0x00000000000000c0
1 .rodata 0x00000000000000d4
2 .rodata 0x0000000000000015
2 .rodata 0x0000000000000029
2 .rodata 0x000000000000005e
2 .rodata 0x000000000000006b
2 .rodata 0x0000000000000088
2 .rodata 0x000000000000008f
18 .rodata 0x00000000000000b0

如果我添加优化( gcc -W -Wall -O3 -fomit-frame-pointer -c example.c),结果是
1 .rodata 0x0000000000000020
1 .rodata 0x0000000000000040
1 .rodata.str1.1
1 .rodata.str1.1 0x0000000000000058
2 .rodata.str1.1 0x000000000000000d
2 .rodata.str1.1 0x0000000000000021
2 .rodata.str1.1 0x000000000000005f
2 .rodata.str1.1 0x000000000000006c
3 .rodata.str1.1 0x000000000000003a
3 .rodata.str1.1 0x000000000000004c
18 .rodata.str1.1 0x0000000000000008

这表明编译器选项的确发挥了很大作用,但是一个目标始终被使用了18次: .rodata偏移量 0xb0节(如果在编译时启用了优化,则 .rodata.str1.1偏移量 0x8)。

那是字符串“\\ t%s \ n”。

将原始程序修改为
    char *local8 = "char *";
char *const local9 = "char *const";

const char *const fmt = "\t%s\n";

printf("Global:\n");
printf(fmt, global1);
printf(fmt, global2);

依此类推,用不可变的字符串指针 fmt替换格式字符串,完全消除了这18个重定位。 (当然,您也可以使用等效的 const char fmt[] = "\t%s\n";。)

以上分析表明,至少对于GCC-4.6.3,大多数可避免的重定位是由(重复使用)字符串文字引起的。用const chars( const char fmt[] = "\t%s\n";)数组或const chars( const char *const fmt = "\t%s\n";)的const指针替换它们-两种情况都将内容置于 .rodata节(只读),并且指针/数组引用本身也是不可变的-似乎是有效和安全的策略对我来说。

此外,将字符串文字转换为不可变的字符串指针或char数组完全是源代码级的任务。也就是说,如果使用上述方法转换所有字符串文字,则可以消除每个字符串文字至少一个重定位。

实际上,在这里,我看不到对象级分析如何对您有很大帮助。当然,它将告诉您所做的修改是否减少了所需的重定位次数。

上面的 awk节可以扩展为一个函数,该函数输出具有正偏移量的动态引用的字符串常量:
#!/bin/bash
if [ $# -ne 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
exec >&2
echo ""
echo "Usage: %s [ -h | --help ]"
echo " %s object.o"
echo ""
exit 1
fi

export LANG=C LC_ALL=C

objdump -wr "$1" | awk '
BEGIN {
RS = "[\t\v\f ]*[\r\n][\t\n\v\f\r ]*"
FS = "[\t\v\f ]+"
}

$1 ~ /^[0-9A-Fa-f]+/ {
n[$3]++
}

END {
for (s in n)
printf "%d %s\n", n[s], s
}
' | sort -g | gawk -v filename="$1" '
BEGIN {
RS = "[\t\v\f ]*[\r\n][\t\n\v\f\r ]*"
FS = "[\t\v\f ]+"

cmd = "objdump --file-offsets -ws " filename
while ((cmd | getline) > 0)
if ($3 == "section") {
s = $4
sub(/:$/, "", s)
o = $NF
sub(/\)$/, "", o)
start[s] = strtonum(o)
}
close(cmd)
}

{
if ($2 ~ /\..*\+/) {
s = $2
o = $2
sub(/\+.*$/, "", s)
sub(/^[^\+]*\+/, "", o)
o = strtonum(o) + start[s]
cmd = "dd if=\"" filename "\" of=/dev/stdout bs=1 skip=" o " count=256"
OLDRS = RS
RS = "\0"
cmd | getline hex
close(cmd)
RS = OLDRS
gsub(/\\/, "\\\\", hex)
gsub(/\t/, "\\t", hex)
gsub(/\n/, "\\n", hex)
gsub(/\r/, "\\r", hex)
gsub(/\"/, "\\\"", hex)
if (hex ~ /[\x00-\x1F\x7F-\x9F\xFE\xFF]/ || length(hex) < 1)
printf "%s\n", $0
else
printf "%s = \"%s\"\n", $0, hex
} else
print $0
}
'

这有点粗糙,只是拍打在一起,所以我不知道它的便携性。在我的机器上,它似乎确实找到了我尝试过的几个测试用例的字符串文字。您可能应该重写它以满足您自己的需求。甚至使用带有ELF支持的实际编程语言直接检查目标文件。

对于上面显示的示例程序(在修改之前,我建议减少重新定位的次数),未经优化而编译,上面的脚本产生输出
1 .data+0x000000000000001c = ""
1 .data-0x0000000000000004
1 .rodata
1 .rodata+0x0000000000000044 = ""
1 .rodata+0x00000000000000a8 = "Global:"
1 .rodata+0x00000000000000b5 = "Local:"
1 .rodata+0x00000000000000c0 = "static const char []"
1 .rodata+0x00000000000000d4 = ""
1 .text
1 __stack_chk_fail-0x0000000000000004
1 format
1 global4
1 global5-0x0000000000000004
1 global6-0x0000000000000004
1 global7
1 global8-0x0000000000000004
1 global9-0x0000000000000004
1 putchar-0x0000000000000004
2 .rodata+0x0000000000000015 = "static const char *"
2 .rodata+0x0000000000000029 = "static const char *const"
2 .rodata+0x000000000000005e = "const char *"
2 .rodata+0x000000000000006b = "const char *const"
2 .rodata+0x0000000000000088 = "char *"
2 .rodata+0x000000000000008f = "char *const"
2 puts-0x0000000000000004
18 .rodata+0x00000000000000b0 = "\t%s\n"
18 printf-0x0000000000000004

最后,您可能会注意到,使用指向 printf()的函数指针而不是直接调用 printf()将减少示例代码中的另外18个重定位,但是我认为这是一个错误。

对于代码,您需要重定位,因为间接函数调用(通过函数指针进行的调用)比直接调用要慢得多。简而言之,这些重定位使函数和子例程调用快得多,因此您绝对希望保留这些重定位。

抱歉,答案很长;希望您觉得这个有帮助。有什么问题吗

关于c++ - 寻找搬迁的起源,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19067010/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com