gpt4 book ai didi

linux - 如何测试/验证 vmalloc 保护页在 Linux 中是否正常工作

转载 作者:行者123 更新时间:2023-12-03 10:00:08 27 4
gpt4 key购买 nike

我正在研究 Linux 中的堆栈保护。我发现 Linux 内核 VMAP_STACK 配置参数使用保护页面机制以及 vmalloc() 来提供堆栈保护。
我正在尝试找到一种方法来检查此保护页在 Linux 内核中的工作方式。我用谷歌搜索并检查了内核代码,但没有找到代码。

进一步的问题是如何验证 protected 堆栈。
我有一个内核模块来欠载/溢出进程的内核堆栈,就像这样

static void shoot_kernel_stack(void)
{
unsigned char *ptr = task_stack_page(current);
unsigned char *tmp = NULL;


tmp = ptr + THREAD_SIZE + PAGE_SIZE + 0;
// tmp -= 0x100;
memset(tmp, 0xB4, 0x10); // Underrun
}

我真的遇到了如下所示的内核 panic ,

[ 8006.358354] BUG: stack guard page was hit at 00000000e8dc2d98 (stack is 00000000cff0f921..00000000653b24a9)
[ 8006.361276] kernel stack overflow (page fault): 0000 [#1] SMP PTI

这是验证保护页的正确方法吗?

最佳答案

VMAP_STACK Linux 功能用于将线程的内核堆栈映射到 VMA。通过虚拟映射堆栈,底层物理页面不需要是连续的。可以通过添加保护页来检测跨页溢出。由于 VMA 后面跟着一个守卫(除非在分配时传递了 VM_NO_GUARD 标志),分配在这些区域中的堆栈受益于堆栈溢出检测。

分配

线程堆栈是在线程创建时使用 kernel/fork.c 中的 alloc_thread_stack_node() 分配的。当 VMAP_STACK 被激活时,the stacks are cached因为根据源码中的注释:

vmalloc() is a bit slow, and calling vfree() enough times will force a TLBflush. Try to minimize the number of calls by caching stacks.

内核堆栈大小为 THREAD_SIZE(在 x86_64 平台上等于 4 页)。在线程创建时调用的分配的源代码是:

static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
{
#ifdef CONFIG_VMAP_STACK
void *stack;
int i;

[...] // <----- Part which gets a previously cached stack. If no stack in cache
// the following is run to allocate a brand new stack:

/*
* Allocated stacks are cached and later reused by new threads,
* so memcg accounting is performed manually on assigning/releasing
* stacks to tasks. Drop __GFP_ACCOUNT.
*/
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
VMALLOC_START, VMALLOC_END,
THREADINFO_GFP & ~__GFP_ACCOUNT,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
[...]

__vmalloc_node_range()mm/vmalloc.c 中定义。这会调用 __get_vm_area_node()。由于后者未通过 VM_NO_GUARD 标志,因此在分配区域的末尾添加了一个额外的页面。这是 VMA 的保护页面:

static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start,
unsigned long end, int node, gfp_t gfp_mask, const void *caller)
{
struct vmap_area *va;
struct vm_struct *area;

BUG_ON(in_interrupt());
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;

if (flags & VM_IOREMAP)
align = 1ul << clamp_t(int, get_count_order_long(size),
PAGE_SHIFT, IOREMAP_MAX_ORDER);

area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;

if (!(flags & VM_NO_GUARD)) // <----- A GUARD PAGE IS ADDED
size += PAGE_SIZE;

va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
if (IS_ERR(va)) {
kfree(area);
return NULL;
}

setup_vmalloc_vm(area, va, flags, caller);

return area;
}

溢出管理

堆栈溢出管理依赖于体系结构(即源代码位于arch/...)。下面引用的链接提供了一些关于一些依赖于架构的实现的指示。

对于 x86_64 平台,溢出检查是在页面错误中断时完成的,它会触发以下函数调用链:do_page_fault()->__do_page_fault()->do_kern_addr_fault()->bad_area_nosemaphore()->no_context () 函数定义在 arch/x86/mm/fault.c 中。在no_context()中,有一个部分专门用于VMAP_STACK管理,用于检测堆栈下溢/溢出:

static noinline void
no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long address, int signal, int si_code)
{
struct task_struct *tsk = current;
unsigned long flags;
int sig;
[...]
#ifdef CONFIG_VMAP_STACK
/*
* Stack overflow? During boot, we can fault near the initial
* stack in the direct map, but that's not an overflow -- check
* that we're in vmalloc space to avoid this.
*/
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
/*
* We're likely to be running with very little stack space
* left. It's plausible that we'd hit this condition but
* double-fault even before we get this far, in which case
* we're fine: the double-fault handler will deal with it.
*
* We don't want to make it all the way into the oops code
* and then double-fault, though, because we're likely to
* break the console driver and lose most of the stack dump.
*/
asm volatile ("movq %[stack], %%rsp\n\t"
"call handle_stack_overflow\n\t"
"1: jmp 1b"
: ASM_CALL_CONSTRAINT
: "D" ("kernel stack overflow (page fault)"),
"S" (regs), "d" (address),
[stack] "rm" (stack));
unreachable();
}
#endif
[...]
}

在上面的代码中,当检测到堆栈下溢/溢出时,调用 arch/x86/kernel/traps.c 中定义的handle_stack_overflow() 函数:

#ifdef CONFIG_VMAP_STACK
__visible void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
unsigned long fault_address)
{
printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
(void *)fault_address, current->stack,
(char *)current->stack + THREAD_SIZE - 1);
die(message, regs, 0);

/* Be absolutely certain we don't return. */
panic("%s", message);
}
#endif

问题中指出的示例错误消息“BUG: stack guard page was hit at...”来自上述handle_stack_overflow()函数。

来自您的示例模块

VMAP_STACK 被定义时,任务描述符的 stack_vm_area 字段出现并设置为与堆栈关联的 VMA 地址。从那里,可以获取有趣的信息:

struct task_struct *task;
#ifdef CONFIG_VMAP_STACK
struct vm_struct *vm;
#endif // CONFIG_VMAP_STACK

task = current;

printk("\tKernel stack: 0x%lx\n", (unsigned long)(task->stack));
printk("\tStack end magic: 0x%lx\n", *(unsigned long *)(task->stack));
#ifdef CONFIG_VMAP_STACK
vm = task->stack_vm_area;
printk("\tstack_vm_area->addr = 0x%lx\n", (unsigned long)(vm->addr));
printk("\tstack_vm_area->nr_pages = %u\n", vm->nr_pages);
printk("\tstack_vm_area->size = %lu\n", vm->size);
#endif // CONFIG_VMAP_STACK
printk("\tLocal var in stack: 0x%lx\n", (unsigned long)(&task));

nr_pages 字段是没有附加保护页的页数。堆栈顶部的最后一个 unsigned long 由 include/uapi/linux/magic.h 中定义的 STACK_END_MAGIC 设置为:

#define STACK_END_MAGIC     0x57AC6E9D

引用文献:

关于linux - 如何测试/验证 vmalloc 保护页在 Linux 中是否正常工作,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/65121145/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com