gpt4 book ai didi

multithreading - 这是glibc/pthread中的错误吗?

转载 作者:行者123 更新时间:2023-12-03 12:59:04 24 4
gpt4 key购买 nike

我正在使用健壮的互斥锁和条件。在大多数情况下,这是可行的,但是我却很少遇到僵局。
我无法将其简化为一个可重现的小示例,并且我认为这很可能是我的代码中的一个问题,但是,我注意到了一些看起来可疑的东西:

当代码死锁时,pthread_cond_broadcast中有一个线程:

#0  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
#1 0x00007f4ab2892970 in pthread_cond_broadcast@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S:133

互斥锁上的另一个线程位于pthread_mutex_lock中,该线程与条件一起使用:
#0  __lll_robust_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S:85
#1 0x00007f4ab288e7d7 in __pthread_mutex_lock_full (mutex=0x7f4a9858b128) at ../nptl/pthread_mutex_lock.c:256

如您所见,pthread_mutex_lock使用lowlevelrobuSTLock,而pthread_cond_broadcast使用lowlevellock。该条件是否可能以某种方式在内部使用了非健壮的互斥体?

我使用互斥锁来保护共享内存,共享它的进程之一有可能被杀死。
那么,也许发生死锁的原因是进程被杀死时位于pthread_cond_broadcast中,而现在另一个进程无法广播,因为被杀死的进程仍然拥有互斥体?毕竟,类似的情况就是为什么我首先开始使用健壮的互斥锁。

PS:处理了在关键部分终止进程的情况,健壮的互斥体效果很好。对于所有死锁,我看到了pthread_cond_broadcast是 Activity 函数的情况。

PPS:对于互斥锁,有pthread_mutexattr_setrobust,但是我找不到类似pthread_condattr_setrobust的东西。是否存在?

最佳答案

编辑:

此“错误”已报告为here。在此特定用例中,这只是条件变量的未定义行为。没有健壮的条件变量,因此不能在具有共享内存的IPC中使用它们。线程取消可以使条件变量处于不一致状态。

以前的答案如下:

我也有同样的问题。这是在pthread_cond_broadcast中导致死锁的示例代码:

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>

#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

#define TRUE 1
#define FALSE 0

typedef struct {
pthread_cond_t cond;
pthread_mutex_t mtx;
int predicate;
} channel_hdr_t;

typedef struct {
int fd;
channel_hdr_t *hdr;
} channel_t;

void printUsage() {
printf("usage: shm_comm_test2 channel_name1 channel_name2\n");
}

int robust_mutex_lock(pthread_mutex_t *mutex) {
// lock hdr mutex in the safe way
int lock_status = pthread_mutex_lock (mutex);
int acquired = FALSE;
int err = -18;
switch (lock_status)
{
case 0:
acquired = TRUE;
break;
case EINVAL:
printf("**** EINVAL ****\n");
err = -12;
break;
case EAGAIN:
printf("**** EAGAIN ****\n");
err = -13;
break;
case EDEADLK:
printf("**** EDEADLK ****\n");
err = -14;
break;
case EOWNERDEAD:
// the reader that acquired the mutex is dead
printf("**** EOWNERDEAD ****\n");

// recover the mutex
if (pthread_mutex_consistent(mutex) == EINVAL) {
printf("**** EOWNERDEAD, EINVAL ****\n");
err = -15;
break;
}
acquired = TRUE;
break;
default:
printf("**** OTHER ****\n");
// other error
err = -18;
break;
}

return acquired ? 0 : err;
}

int init_channel(char *shm_name, channel_t *out) {
int initialize = FALSE;

int shm_fd = shm_open (shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
if (shm_fd < 0) {
if (errno == EEXIST) {
// open again, do not initialize
shm_fd = shm_open (shm_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
if (shm_fd < 0) {
printf( "ERROR: could not create %s, errno: %d\n", shm_name, errno );
return 1;
}
}
else {
printf( "ERROR: could not create %s, errno: %d\n", shm_name, errno );
return 2;
}
}
else {
// the shm object was created, so initialize it
initialize = TRUE;

printf("created shm object %s\n", shm_name);
if (ftruncate (shm_fd, sizeof(channel_hdr_t)) != 0)
{
printf( "ERROR: could not ftruncate %s, errno: %d\n", shm_name, errno );
close (shm_fd);
shm_unlink (shm_name);
return 3;
}
}

void *ptr_shm_hdr = mmap (NULL, sizeof(channel_hdr_t), PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);

if (ptr_shm_hdr == MAP_FAILED)
{
printf( "ERROR: could not mmap %s, errno: %d\n", shm_name, errno );
close (shm_fd);
shm_unlink (shm_name);
return 4;
}

channel_hdr_t *shm_hdr = ptr_shm_hdr;

if (initialize) {
// set mutex shared between processes
pthread_mutexattr_t mutex_attr;
pthread_mutexattr_init(&mutex_attr);
pthread_mutexattr_setpshared (&mutex_attr, PTHREAD_PROCESS_SHARED);
pthread_mutexattr_setrobust (&mutex_attr, PTHREAD_MUTEX_ROBUST);
pthread_mutexattr_setprotocol(&mutex_attr, PTHREAD_PRIO_INHERIT);

pthread_mutex_init (&shm_hdr->mtx, &mutex_attr);

// set condition shared between processes
pthread_condattr_t cond_attr;
pthread_condattr_init(&cond_attr);
pthread_condattr_setpshared (&cond_attr, PTHREAD_PROCESS_SHARED);
pthread_cond_init (&shm_hdr->cond, &cond_attr);
}

shm_hdr->predicate = 0;
out->fd = shm_fd;
out->hdr = shm_hdr;

return 0;
}

int main(int argc, char **argv) {
if (argc != 3) {
printUsage();
return 0;
}

char *shm_1_name = argv[1];
char *shm_2_name = argv[2];

channel_t ch_1;
if (init_channel(shm_1_name, &ch_1) != 0) {
return 1;
}

channel_t ch_2;
if (init_channel(shm_2_name, &ch_2) != 0) {
munmap( ch_1.hdr, sizeof(channel_hdr_t) );
close( ch_1.fd );
return 2;
}

int counter = 0;
int counter2 = 0;
while (TRUE) {
++counter;
if (counter == 100000) {
printf("alive %d\n", counter2);
++counter2;
counter = 0;
}
int ret = robust_mutex_lock(&ch_1.hdr->mtx);
if (ret != 0) {
return ret;
}
ch_1.hdr->predicate = 1;
pthread_cond_broadcast (&ch_1.hdr->cond); // deadlock here
pthread_mutex_unlock (&ch_1.hdr->mtx);



ret = robust_mutex_lock(&ch_2.hdr->mtx);
if (ret != 0) {
return ret;
}

while (ch_2.hdr->predicate == 0 && ret == 0)
{
ret = pthread_cond_wait (&ch_2.hdr->cond, &ch_2.hdr->mtx); // deadlock here
}
ch_2.hdr->predicate = 0;
pthread_mutex_unlock (&ch_2.hdr->mtx);
}

munmap( ch_1.hdr, sizeof(channel_hdr_t) );
close( ch_1.fd );

munmap( ch_2.hdr, sizeof(channel_hdr_t) );
close( ch_2.fd );

return 0;
}

重现死锁:
  • 使用args运行程序的第一个实例:channel1 channel2
  • 使用args运行程序的第二个实例:channel2 channel1
  • 使用Ctrl + C中断两个程序
  • 再次运行两个程序

  • 该问题在Ubuntu 16.04中不存在。
    但是,它发生在18.04。

    死锁中的两个程序的回溯:

    第一的:
    #0  0x00007f9802d989f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f98031cd02c)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
    #1 __pthread_cond_wait_common (abstime=0x0, mutex=0x7f98031cd030, cond=0x7f98031cd000) at pthread_cond_wait.c:502
    #2 __pthread_cond_wait (cond=0x7f98031cd000, mutex=0x7f98031cd030) at pthread_cond_wait.c:655
    #3 0x00005648bc2af081 in main (argc=<optimized out>, argv=<optimized out>)
    at /home/dseredyn/ws_velma/ws_fabric/src/shm_comm/src/test2.c:198

    第二:
    #0  0x00007f1a3434b724 in futex_wait (private=<optimized out>, expected=3, futex_word=0x7f1a34780010)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:61
    #1 futex_wait_simple (private=<optimized out>, expected=3, futex_word=0x7f1a34780010)
    at ../sysdeps/nptl/futex-internal.h:135
    #2 __condvar_quiesce_and_switch_g1 (private=<optimized out>, g1index=<synthetic pointer>, wseq=<optimized out>,
    cond=0x7f1a34780000) at pthread_cond_common.c:412
    #3 __pthread_cond_broadcast (cond=0x7f1a34780000) at pthread_cond_broadcast.c:73
    #4 0x0000557a978b2043 in main (argc=<optimized out>, argv=<optimized out>)
    at /home/dseredyn/ws_velma/ws_fabric/src/shm_comm/src/test2.c:185

    关于multithreading - 这是glibc/pthread中的错误吗?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51730660/

    24 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com