gpt4 book ai didi

当进程死亡时创建新进程

转载 作者:行者123 更新时间:2023-11-30 19:31:59 26 4
gpt4 key购买 nike

我需要同时运行 n 个进程,并且这些进程仅在指定时间内处于事件状态,每次进程死亡时我都想创建一个新进程。我想出了一些可行的方法,但我想知道这是否是正确的方法。

#define n 5

void newProcess(){
if(fork()==0){
//work
exit(0);
}
}

int main(){
int processCount = 0;
while(1){
if(processCount<n){
newProcess();
processCount++;
}
else{
wait(NULL);
processCount--;
}
}
return 0;
}

最佳答案

这里有一些工作代码,或多或少代表了代码的工作版本,其中添加了日志记录等。通过日志记录可以更轻松地看到它是否正常工作。

#include "stderr.h"
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#define N 5

static int newProcess(void)
{
int pid = fork();
if (pid == 0)
{
// work - this process goes to sleep on the job!
struct timespec nap = { .tv_sec = rand() % 3, .tv_nsec = rand() % 1000000000 };
nanosleep(&nap, 0);
err_remark("About to do %ld.%9ld seconds work\n", (long)nap.tv_sec, nap.tv_nsec);
int rc = 0;
if (rand() % 100 > 90)
rc = rand() % 255;
err_remark("Work completed - exit status %d\n", rc);
exit(rc);
}
if (pid > 0 && rand() % 100 > 90)
{
kill(pid, rand() % 8 + 1);
errno = EAGAIN;
pid = -1;
}
return pid;
}

static inline int check_child(int pid)
{
#undef SIGNONE
enum { SIGNONE = 0 };
int rc = kill(pid, SIGNONE);
err_remark("PID %d - %s\n", pid, (rc == 0) ? "OK" : "Dead");
return rc;
}

static void process_check(int *npids, int pids[])
{
err_remark("Checking PID list\n");
for (int i = 0; i < *npids; i++)
{
while (check_child(pids[i]) != 0)
{
// Child is presumably dead!
if (*npids > 0)
pids[i] = pids[--*npids];
}
}
}

int main(int argc, char **argv)
{
err_setarg0(argv[0]);
if (argc != 1)
err_usage(" # No arguments allowed");
int child_pid[N] = { 0 };
srand(time(0));
err_setlogopts(ERR_PID | ERR_MICRO);

int processCount = 0;
while (1)
{
if (processCount < N)
{
int pid = newProcess();
if (pid > 0)
{
child_pid[processCount++] = pid;;
err_remark("PID %d started\n", pid);
}
else
{
assert(pid == -1);
int errnum = errno;
err_sysrem("Failed to fork");
process_check(&processCount, child_pid);
if (errnum == EAGAIN)
{
struct timespec nap = { .tv_sec = 0, .tv_nsec = (rand() % 10 + 1) * 1000000 };
nanosleep(&nap, 0); // sleep 1-10 milliseconds (could be too big).
}
}
}
else
{
int corpse;
int status;
if ((corpse = wait(&status)) > 0)
{
int known_pid = 0;
for (int i = 0; i < processCount; i++)
{
if (child_pid[i] == corpse)
{
err_remark("PID %d exit status 0x%.4X\n", corpse, status);
known_pid = 1;
child_pid[i] = child_pid[--processCount];
break;
}
}
if (!known_pid)
err_remark("Unknown PID %d exit status 0x%.4X - ignored\n", corpse, status);
}
}
}
return 0;
}

头文件 stderr.h 及其配套源代码 stderr.c 可以在 GitHub 上我的 SOQ 存储库的 libsoq folder 中找到。 。它提供了方便且可配置的日志记录服务。

请注意,测试代码会伪造一些故障,并杀死一些子进程等。您需要删除该生产代码。您很可能会保留大部分日志记录,特别是如果子级通常一次工作很多秒而不是像本例中那样只工作几秒。

一些示例输出:

$ ./mon61
mon61: 2017-12-01 09:48:03.636756 - pid=74353: PID 74354 started
mon61: 2017-12-01 09:48:03.637568 - pid=74353: PID 74355 started
mon61: 2017-12-01 09:48:03.637724 - pid=74353: PID 74356 started
mon61: 2017-12-01 09:48:03.637885 - pid=74353: PID 74357 started
mon61: 2017-12-01 09:48:03.638048 - pid=74353: PID 74358 started
mon61: 2017-12-01 09:48:03.747398 - pid=74356: About to do 0.108225168 seconds work
mon61: 2017-12-01 09:48:03.748152 - pid=74356: Work completed - exit status 0
mon61: 2017-12-01 09:48:03.748791 - pid=74353: PID 74356 exit status 0x0000
mon61: 2017-12-01 09:48:03.749046 - pid=74353: PID 74359 started
mon61: 2017-12-01 09:48:04.032219 - pid=74359: About to do 0.281932019 seconds work
mon61: 2017-12-01 09:48:04.032971 - pid=74359: Work completed - exit status 0
mon61: 2017-12-01 09:48:04.033747 - pid=74353: PID 74359 exit status 0x0000
mon61: 2017-12-01 09:48:04.034007 - pid=74353: PID 74361 started
mon61: 2017-12-01 09:48:04.602396 - pid=74355: About to do 0.964067315 seconds work
mon61: 2017-12-01 09:48:04.602951 - pid=74355: Work completed - exit status 0
mon61: 2017-12-01 09:48:04.603596 - pid=74353: PID 74355 exit status 0x0000
mon61: 2017-12-01 09:48:04.603855 - pid=74353: PID 74362 started
mon61: 2017-12-01 09:48:05.419466 - pid=74358: About to do 1.780199743 seconds work
mon61: 2017-12-01 09:48:05.420017 - pid=74358: Work completed - exit status 0
mon61: 2017-12-01 09:48:05.420669 - pid=74353: PID 74358 exit status 0x0000
mon61: 2017-12-01 09:48:05.420923 - pid=74353: PID 74363 started
mon61: 2017-12-01 09:48:05.453929 - pid=74357: About to do 1.814728145 seconds work
mon61: 2017-12-01 09:48:05.454320 - pid=74357: Work completed - exit status 0
mon61: 2017-12-01 09:48:05.454753 - pid=74353: PID 74357 exit status 0x0000
mon61: 2017-12-01 09:48:05.454939 - pid=74353: PID 74364 started
mon61: 2017-12-01 09:48:05.512822 - pid=74354: About to do 1.875699204 seconds work
mon61: 2017-12-01 09:48:05.514094 - pid=74354: Work completed - exit status 0
mon61: 2017-12-01 09:48:05.514349 - pid=74353: PID 74354 exit status 0x0000
mon61: 2017-12-01 09:48:05.514658 - pid=74353: PID 74365 started
mon61: 2017-12-01 09:48:06.004823 - pid=74362: About to do 1.399425773 seconds work
mon61: 2017-12-01 09:48:06.005581 - pid=74362: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.006237 - pid=74353: PID 74362 exit status 0x0000
mon61: 2017-12-01 09:48:06.006523 - pid=74353: Failed to forkerror (35) Resource temporarily unavailable
mon61: 2017-12-01 09:48:06.006562 - pid=74353: Checking PID list
mon61: 2017-12-01 09:48:06.006570 - pid=74353: PID 74364 - OK
mon61: 2017-12-01 09:48:06.006576 - pid=74353: PID 74361 - OK
mon61: 2017-12-01 09:48:06.006582 - pid=74353: PID 74365 - OK
mon61: 2017-12-01 09:48:06.006588 - pid=74353: PID 74363 - OK
mon61: 2017-12-01 09:48:06.013228 - pid=74353: PID 74368 started
mon61: 2017-12-01 09:48:06.013267 - pid=74353: Unknown PID 74366 exit status 0x0006 - ignored
mon61: 2017-12-01 09:48:06.117089 - pid=74361: About to do 2. 82518051 seconds work
mon61: 2017-12-01 09:48:06.117618 - pid=74361: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.118206 - pid=74353: PID 74361 exit status 0x0000
mon61: 2017-12-01 09:48:06.118486 - pid=74353: PID 74369 started
mon61: 2017-12-01 09:48:06.537455 - pid=74363: About to do 1.115086289 seconds work
mon61: 2017-12-01 09:48:06.537967 - pid=74363: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.538610 - pid=74353: PID 74363 exit status 0x0000
mon61: 2017-12-01 09:48:06.538880 - pid=74353: PID 74371 started
mon61: 2017-12-01 09:48:06.682182 - pid=74371: About to do 0.141922802 seconds work
mon61: 2017-12-01 09:48:06.682945 - pid=74371: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.683733 - pid=74353: PID 74371 exit status 0x0000
mon61: 2017-12-01 09:48:06.684007 - pid=74353: PID 74372 started
mon61: 2017-12-01 09:48:06.975561 - pid=74364: About to do 1.519976923 seconds work
mon61: 2017-12-01 09:48:06.976341 - pid=74364: Work completed - exit status 188
mon61: 2017-12-01 09:48:06.976942 - pid=74353: PID 74364 exit status 0xBC00
mon61: 2017-12-01 09:48:06.977225 - pid=74353: PID 74373 started
mon61: 2017-12-01 09:48:07.436814 - pid=74368: About to do 1.422967208 seconds work
mon61: 2017-12-01 09:48:07.437600 - pid=74368: Work completed - exit status 0
mon61: 2017-12-01 09:48:07.438230 - pid=74353: PID 74368 exit status 0x0000

仔细检查日志表明存在一些“未知 PID”的死亡消息。这表明在管理 PID 数组(也称为“修复错误”)方面还有一些工作要做。 我稍后可能会研究一下。

查看代码,这些都是“预期的”。大约有 9% 的机会创建一个子进程,但被信号杀死(所有这些子进程的状态值都设置为 0x0001 到 0x0008,表示被信号杀死)。对于这些进程,newProcess() 的返回状态为 -1,这会阻止 PID 进入已知子进程列表,因此当子进程死亡时,状态信息已收集,PID 为“未知”。换句话说,这是“预期”行为。通过否定返回给调用进程的 PID,并编写一条消息来指示该特定子 PID 已创建,但因信号而死亡(可能在子进程有机会执行任何操作(例如报告它是)之前,可以更好地记录此类进程。运行)。

“About to do N.xxxxxxxxxx Seconds work”相关的err_remark()调用位置错误且格式错误。它应该发生在超微 sleep 之前,而不是之后。它还应该使用 %.9d 而不是 %9d 来格式化小数时间。两者都很容易修复。

除了让 children 做真正的工作而不是只是在工作中 sleep 之外,还有很多改进要做。该代码可以处理一些信号(例如,中断以检查子级、挂起以重新读取配置文件、终止以杀死子级并退出)。它可以写入日志文件而不是标准错误。它可以被守护进程而不是在前台运行。它可以有控制日志文件目录的选项,也许还可以控制日志文件名。它可以检测其日志文件是否/何时被删除并启动一个新的。等等

但这给了你一些可以玩的东西。

关于当进程死亡时创建新进程,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47595743/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com