gpt4 book ai didi

c - 内核 block 设备 - 使用自旋锁陷入死锁

转载 作者:太空狗 更新时间:2023-10-29 15:39:53 24 4
gpt4 key购买 nike

我刚刚实现了一个虚拟 block 设备,我想用它来探索 linux 内核如何处理 block 设备。

我的设备只是一个内存区域,分为两个 512 字节的扇区。

我正在使用全局结构来存储设备信息:

typedef struct
{
uint32_t hard_sector_size; // Size of a device sector
uint32_t sector_number; // Number of sector on device
uint32_t size; // Total size of virtual device in bytes
uint8_t* data; // Device memory buffer
spinlock_t device_lock; // Device structure access spinlock
struct request_queue *queue; // Device request queue
struct gendisk *gendisk; // Device "disk" representation
int major; // Device major number attributed by kernel
int minor; // Device minor number fixed at initialization
uint32_t r_users; // Number of read access
uint32_t w_users; // Number of write access
}blk_mod_t;

blk_mod_t self;

[...]

现在我想保护这个结构免受并发访问。为此,我使用了 device_lock 字段。
如果锁被占用,结构正在更新,所以我应该等到完成。
如果没有被占用,我可以访问结构字段。

现在我只在以下三个函数中使用这个自旋锁

static int block_mod_open(struct block_device *bdev, fmode_t mode)
{
access_mode_t access_mode;
DEBUG("Entering open function\n");

if((mode & FMODE_READ) && (mode & FMODE_WRITE))
{
NOTICE("Oppened in read/write mode\n");
mode = ACCESS_RW;
}
else if(mode & FMODE_READ)
{
NOTICE("Oppened in read only mode\n");
mode = ACCESS_RONLY;
}
else if(mode & FMODE_WRITE)
{
NOTICE("Oppened in write only mode\n");
mode = ACCESS_WONLY;
}

DEBUG("<--\n");
spin_lock(&self.device_lock);

if(ACCESS_RW == access_mode)
{
self.r_users++;
self.w_users++;
}
else if(ACCESS_RONLY == access_mode)
{
self.r_users++;
}
else
{
self.w_users++;
}
NOTICE("Read access: %d\tWrite access: %d\n", self.r_users, self.w_users);

DEBUG("-->\n");
spin_unlock(&self.device_lock);

DEBUG("Exiting open function\n");
return 0;
}

static void block_mod_release(struct gendisk *disk, fmode_t mode)
{
access_mode_t access_mode;
DEBUG("Entering release function\n");

if((mode & FMODE_READ) && (mode & FMODE_WRITE))
{
NOTICE("Closed read/write mode\n");
mode = ACCESS_RW;
}
else if(mode & FMODE_READ)
{
NOTICE("Closed read only mode\n");
mode = ACCESS_RONLY;
}
else if(mode & FMODE_WRITE)
{
NOTICE("Closed write only mode\n");
mode = ACCESS_WONLY;
}

DEBUG("<--\n");
spin_lock(&self.device_lock);

if(ACCESS_RW == access_mode)
{
self.r_users--;
self.w_users--;
}
else if(ACCESS_RONLY == access_mode)
{
self.r_users--;
}
else
{
self.w_users--;
}
NOTICE("Read access: %d\tWrite access: %d\n", self.r_users, self.w_users);

DEBUG("-->\n");
spin_unlock(&self.device_lock);

DEBUG("Exiting release function\n");
return;
}

static void block_mod_transfer(unsigned long sector, unsigned long nsect, char *buffer, int write)
{
unsigned long offset = sector*KERNEL_SECTOR_SIZE;
unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE;
DEBUG("Entering transfer function\n");
DEBUG("<--\n");
spin_lock(&self.device_lock);

if((offset + nbytes) > self.size) {
WARNING("Beyond-end write (%ld %ld)\n", offset, nbytes);
spin_unlock(&self.device_lock);
return;
}
if(write)
{
NOTICE("Writing to device\n");
memcpy(self.data + offset, buffer, nbytes);
}
else
{
NOTICE("Reading from device\n");
memcpy(buffer, self.data + offset, nbytes);
}

DEBUG("-->\n");
spin_unlock(&self.device_lock);
DEBUG("Exiting transfer function\n");
}

我正在用以下函数处理请求

static void block_mod_request(struct request_queue *queue)
{
DEBUG("Entering request function\n");
struct request *request;

while(NULL != (request = blk_fetch_request(queue)))
{
blk_mod_t *self = request->rq_disk->private_data;
// Check if request is a filesystem request (i.e. moves block of data)
if(REQ_TYPE_FS != request->cmd_type)
{
// Close request with unsuccessful status
WARNING("Skip non-fs request\n");
__blk_end_request_cur(request, -EIO);
continue;
}
// Treat request
block_mod_transfer(blk_rq_pos(request), blk_rq_cur_sectors(request), bio_data(request->bio), rq_data_dir(request));
// Close request with successful status
__blk_end_request_cur(request, 0);
}

DEBUG("Exiting request function\n");
return;
}

当我加载模块时,没有什么特别的事情发生。但是,如果我尝试读取它,我就会陷入僵局,因为我的系统不再响应,我必须重新启动。

这是输出:

root@PC325:~# echo 8 > /proc/sys/kernel/printk
root@PC325:~# insmod block_mod.ko
[ 64.546791] block_mod: loading out-of-tree module taints kernel.
[ 64.548197] block_mod: module license '(c) Test license' taints kernel.
[ 64.549951] Disabling lock debugging due to kernel taint
[ 64.552816] Inserting module 'blk_mod_test'
[ 64.554085] Got major number : '254'
[ 64.554940] Data allocated (size = 1024)
[ 64.557378] Request queue initialized
[ 64.558178] Sent hard sector size to request queue
[ 64.559188] Gendisk allocated
[ 64.559817] Gendisk filled
[ 64.560416] Gendisk capacity set
[ 64.563285] Gendisk added
root@PC325:~# [ 64.565280] Entering open function
[ 64.566035] Oppened in read only mode
[ 64.566773] <--
[ 64.567138] Read access: 1 Write access: 0
[ 64.567977] -->
[ 64.568342] Exiting open function
[ 64.571080] Entering release function
[ 64.571855] Closed read only mode
[ 64.572531] <--
[ 64.572924] Read access: 0 Write access: 0
[ 64.573749] -->
[ 64.574116] Exiting release function
root@PC325:~# cat /dev/blkmodtest
[ 78.488228] Entering open function
[ 78.488988] Oppened in read only mode
[ 78.489733] <--
[ 78.490100] Read access: 1 Write access: 0
[ 78.490925] -->
[ 78.491290] Exiting open function
[ 78.492026] Entering request function
[ 78.492743] Entering transfer function
[ 78.493469] <--
-------------- DEADLOCK HERE --------------

更新:添加初始化和退出函数

static int __init block_mod_init(void)
{
char* message = "abcdefghijklmnopqrstuvwxyz";
int i;

INFO("Inserting module '%s'\n", MODULE_NAME);

// Initialize driver data structure
memset(&self, 0, sizeof(blk_mod_t));
self.hard_sector_size = DEVICE_HARD_SECTOR_SIZE;
self.sector_number = DEVICE_SECTOR_NUMBER;
self.size = self.sector_number*self.hard_sector_size;
self.minor = 1;

// Get a major number from kernel
if(0 > (self.major = register_blkdev(self.major, MODULE_NAME)))
{
ERROR("Unable to get major number for '%s'\n", MODULE_NAME);
unregister_blkdev(self.major, MODULE_NAME);
return -1;
}
DEBUG("Got major number : '%d'\n", self.major);

// Allocate data space
if(NULL == (self.data = vmalloc(self.size)))
{
ERROR("Unable to allocate memory for '%s'\n", MODULE_NAME);
unregister_blkdev(self.major, MODULE_NAME);
return -2;
}
for(i=0;i<self.size;i++)
{
self.data[i] = message[i%strlen(message)];
}
spin_lock_init(&self.device_lock);
DEBUG("Data allocated (size = %d)\n", self.size);

// Allocate the request queue
if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
{
ERROR("Unable to initialize request queue for '%s'\n", MODULE_NAME);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);
return -3;
}
DEBUG("Request queue initialized\n");

// Send device hard sector size to request queue
blk_queue_logical_block_size(self.queue, self.hard_sector_size);
self.queue->queuedata = &self;
DEBUG("Sent hard sector size to request queue\n");

// Allocate the gendisk structure
if(NULL == (self.gendisk = alloc_disk(self.minor)))
{
ERROR("Unable to initialize gendisk for '%s'\n", MODULE_NAME);
blk_cleanup_queue(self.queue);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);
return -4;
}
DEBUG("Gendisk allocated\n");

// Fill gendisk structure
self.gendisk->major = self.major;
self.gendisk->first_minor = self.minor;
self.gendisk->fops = &self_ops;
self.gendisk->queue = self.queue;
self.gendisk->private_data = &self;
snprintf(self.gendisk->disk_name, 32, "blkmodtest");
DEBUG("Gendisk filled\n");
set_capacity(self.gendisk, self.sector_number*(self.hard_sector_size/KERNEL_SECTOR_SIZE));
DEBUG("Gendisk capacity set\n");
add_disk(self.gendisk);
DEBUG("Gendisk added\n");

return 0;
}

static void __exit block_mod_cleanup(void)
{
del_gendisk(self.gendisk);
put_disk(self.gendisk);
blk_cleanup_queue(self.queue);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);

INFO("Removing module '%s'\n", MODULE_NAME);
return;
}

更新:添加宏和枚举定义

#define MODULE_NAME                    "blk_mod_test"
#define KERNEL_SECTOR_SIZE 512
#define DEVICE_HARD_SECTOR_SIZE 512
#define DEVICE_SECTOR_NUMBER 2

typedef enum
{
ACCESS_RONLY = 0,
ACCESS_WONLY = 1,
ACCESS_RW = 2,
}access_mode_t;

我不明白的是,在我尝试将其锁定到 block_mod_transfer 之前,自旋锁已被释放(在 block_mod_open 的末尾)。

所以我不明白为什么当自旋锁似乎可以被占用时内核会陷入死锁。

为什么我在这种情况下会陷入僵局?我做错了什么?

最佳答案

感谢@CraigEstey 的评论,我终于发现问题出在请求队列使用与我的设备结构相同的自旋锁初始化。

// Allocate the request queue
if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
{
ERROR("Unable to initialize request queue for '%s'\n", MODULE_NAME);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);
return -3;
}

因此,当请求队列的回调函数(即 block_mod_request)被调用时,自旋锁已经被持有,我陷入了死锁。

关于c - 内核 block 设备 - 使用自旋锁陷入死锁,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49899888/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com