gpt4 book ai didi

c - 如何标记 shell 输入?

转载 作者:太空宇宙 更新时间:2023-11-04 02:36:25 25 4
gpt4 key购买 nike

我已经编写了自己的 shell,可以执行包含参数的简单命令管道:

$ ls | wc -l
84

但是对于 awk 它是行不通的:

$ ls | awk '{print $1}'
awk: cmd. line:1: '{print
awk: cmd. line:1: ^ invalid char ''' in expression

处理命令行的相关代码是:

int main(int argc, char *argv[]) {

char line2[BUFFER_LEN];
char linecopy[BUFFER_LEN];
char* params[100];
int argc2 = 0;
char *token;
char *less_token;
int i=0;
char *tokenstr;
char *search = " ";
int isBackground = 0;
int built_in_command = 0;
int fd[2];
/* long time;*/
int status = 0;
int max = 80;
int b;
struct passwd *pw;
const char *homedir;
struct timeval time_start;
struct timeval time_end;
sigset_t my_sig;
pid_t pid_temp;
char * pathValue;
char * path_strdup;
struct sigaction sa, osa;
char *p;
char *array[40];
char line[BUFFER_LEN];
size_t length;
int ki;
int ret;
struct sigaction less_sa;
err_setarg0(argv[argc-argc]);
pid_temp = 0; /* To please the compiler */

sa.sa_sigaction = sighandler;
sa.sa_flags = SA_SIGINFO;
sigaction(SIGINT, &sa, &osa);

less_sa.sa_handler = &handle_sigchld;
sigemptyset(&less_sa.sa_mask);
less_sa.sa_flags = SA_RESTART | SA_NOCLDSTOP;
if (sigaction(SIGCHLD, &less_sa, 0) == -1) {
perror(0);
exit(1);
}

/* get the PATH environment to find if less is installed */
pathValue = getenv("PATH");
if (! pathValue) {
printf ("'%s' is not set.\n", "PATH");
}
else {
/* printf ("'%s' is set to %s.\n", "PATH", pathValue);*/
}
path_strdup = strdup(pathValue);
less_token = strtok(path_strdup, ":");
ret = 1;
ret = findless(less_token, ret);
free(path_strdup);
while(1) {
i = 0;
printf("$ ");
fflush(stdout);
if (!fgets(line, BUFFER_LEN, stdin))
{
putchar('\n');
break;
}

if (AllWhiteSpace(line))
continue;

strncpy(line2, line, BUFFER_LEN);
strncpy(linecopy, line, BUFFER_LEN);

length = strlen(line);
if (line[length - 1] == '\n') {
line[length - 1] = '\0';
}
if(strcmp(line, "exit")==0) {
break;
}
if(StartsWith(line, "cd")) {
built_in_command=1;
if(strstr(line, " ") == NULL) {
pw = getpwuid(getuid());
homedir = pw->pw_dir;

if (chdir(homedir)==-1) { /*Change to home directory*/
perror("Failed changing to homedirectory\n");
}
} else {
tokenstr = strtok(NULL, search);
if (chdir(tokenstr)==-1) {
perror("Failed changing directory\n");
}

}
}
token = strtok(line," ");
while(token!=NULL) {
params[i]=token;
token = strtok(NULL," ");
i++;
}
if(StartsWith(line, "checkEnv")) {
built_in_command=1;
checkEnv(ret);
}
if(0==built_in_command) { /*Not a built in command, so let execute it*/

params[i]=NULL;
for(i=0; i<argc2; i++) {
printf("%s\n", params[i]);
}

isBackground = 0;

for (b = 0; b<max; b++) {
if ('&'==line[b]) {
isBackground = 1;
}
}
if (isBackground == 1) { /*If backgroundprocess*/

if (pipe(fd)==-1) { /*(two new file descriptors)*/
perror("Failed creating pipe\n");
}

pid_temp = fork();
}
else if (isBackground == 0) { /*If foreground process*/
gettimeofday(&time_start, NULL);

if (1 == isSignal) { /*If using signaldetection*/
sigemptyset(&my_sig); /*empty and initialising a signal set*/
sigaddset(&my_sig, SIGCHLD); /*Adds signal to a signal set (my_sig)*/
/*http://pubs.opengroup.org/onlinepubs/7908799/xsh/sigprocmask.html*/
sigprocmask(SIG_BLOCK, &my_sig, NULL);
}

pid_temp = fork();
foreground = pid_temp; /*Set pid for foreground process*/
}
if (0<pid_temp) {
/*Parent process*/
}
else if (0>pid_temp) {
/*Error*/
}
else {
/*Child process*/
if (1 == isBackground) { /*Backgroundprocess*/
dup2(fd[STDIN_FILENO], STDIN_FILENO);
close(fd[0]);
close(fd[1]);
}

length = strlen(linecopy);
if (linecopy[length - 1] == '\n')
linecopy[length - 1] = '\0';

/*printf("Command line: %s\n", linecopy);*/
ki = 1;
p = strtok(linecopy, " ");

array[0] = NULL;
while (p != NULL)
{
array[ki++] = p;
p = strtok(NULL, " ");
}
array[ki] = NULL;
/*dump_argv("Before exec_arguments", ki, array);*/
exec_arguments(ki, array);
corpse_collector();


}
if (0 == isBackground) { /*Foregroundprocess*/
waitpid(foreground, &status, 0); /*Waiting*/
/*Foregroundprocess terminated*/

gettimeofday(&time_end, NULL);
/* time = (time_end.tv_sec - time_start.tv_sec) * 1000000 +
time_end.tv_usec - time_start.tv_usec;*/
/*printf("Execution time %ld.%03ld ms\n", time / 1000, time % 1000);*/

if (1 == isSignal) { /*If using signaldetection*/
int a = sigprocmask(SIG_UNBLOCK, &my_sig, NULL);
/*http://man7.org/linux/man-pages/man2/sigprocmask.2.html*/
if (0 == a) {
/*Sigprocmask was successfull*/
}
else {
/*Sigprocmask was not successfull, return=-1*/
}
Janitor(SIGCHLD);
}
}
else if (1==isBackground) {
close(fd[0]);
close(fd[1]);
}
}
built_in_command = 0; /*Reset*/
memset(line, 0, sizeof line); /*Reset*/
}
return (0);
}

整个程序可用here .

如何让我的流水线工作?当我调试它时,它看起来像这样:

$ ./a.out 
$ ls | wc -l
Before exec_arguments: (5) {(null)} {ls} {|} {wc} {-l}
84
27363: child 27364 status 0x0000
$ ls | awk '{print $1}'
Before exec_arguments: (6) {(null)} {ls} {|} {awk} {'{print} {$1}'}
awk: cmd. line:1: '{print
awk: cmd. line:1: ^ invalid char ''' in expression
27374: child 27375 status 0x0100
$

“解决方案”

我的“解决方案”是在出现 awk 时强制循环。也许它并不理想,但它会在管道中启用 awk:

$ ls | awk '{print $1}'
alias.h
a.out
Boot1.asm
Boot1.bin
boot.asm
boot.bin
bootl.asm
bootload.asm
bootload.bin
bootloader
bootloader.asm

我执行标记化的新代码是

        token = strtok(input, " ");
i = 1;
j=1;
params[0] = NULL;
while (token != NULL)
{
if(awk == 1) {
s = concat("awk ", token);
printf("s is %s", s);
params[i++] = token;
token = strtok(NULL, " ");
awk = 0;
continue;

}
if (strcmp(token, "awk") == 0) {
params[i++] = token;
awk = 1;
token = strtok(NULL, "\'");
continue;
}

params[i++] = token;
token = strtok(NULL, " ");
}
params[i] = NULL;
printf("ki %d", i);
/*dump_argv("Before exec_arguments", i, params);*/
exec_arguments(i, params);
corpse_collector();
free(input);

更新

根据 tripleee 的回答,我可以使用该伪代码并实现解析和剥离引号。这是我目前所了解的,它可以编译并适用于某些输入。如果我认为 push 意味着堆栈操作,我希望我不会误解,所以我为 char * 添加了一个堆栈到我的项目中,这似乎有效。

int handleToken(char input[BUFFER_LEN], char *token, char *params[100], int i) {

int state = 0;
char separator = ' ';
int end_quote = 0;
char dest[BUFFER_LEN];
char *ptr;
int pos = 0;
char *ptr2;

while (token != NULL) {
if (state == 0) {
if (1 == StartsWith(token, "'")) {
state = 1;
separator = '\'';
ptr2 = strstr (input,token);
if (ptr2 != NULL)
{
pos = ptr2 - input;
}
if (subString (input, pos+1, strlen (input)-pos-2, dest)) {
params[i++] = dest;
token = strtok(NULL, &separator);
continue;
}
}
if (1 == StartsWith(token, "\"")) {
state = 2;
separator = '\"';
continue;
}
params[i++] = token;
token = strtok(NULL, &separator);
}
else if (state == 1) {
ptr = strchr(token, '\'');
if (ptr) {
end_quote = ptr - token;
}
push(token);
params[i++] = token;
token = strtok(NULL, &separator);
printf("%d", end_quote);
state = 0;
}
}
params[i] = NULL;
return i;
}
/* double-quoted is similar but more complex */

测试

$ echo 'foo bar'
Before exec_arguments: (3) {(null)} {echo} {foo bar}
foo bar
2901: child 2922 status 0x0000
Execution time 1.872 ms

但这还行不通:

$ echo 'a b' | awk '{print $1}'
Before exec_arguments: (3) {(null)} {echo} {a b' | awk '{print $1}}
a b' | awk '{print $1}
2901: child 2993 status 0x0000
Execution time 0.734 ms

最佳答案

你的 shell 应该在解析后去除引号。脚本周围的引号不是 Awk 语言的一部分;它们的目的是保护 awk 脚本不被 shell 以任何方式解析。正确的最终结果是

char *cmd[] = { "/usr/bin/awk", "{ print $1 }", 0 };

完整的 shell 解析器需要处理递归结构,但带引号的字符串只需要对代码进行少量修改。基本上,在伪代码中

while token:
if state == regular:
if token.startswith("'"):
state := single_quoted_string
redo
elsif token.startswith("\""):
state := double_quoted_string
redo
# else
push parsed, token
token := next_token
elsif state == single_quoted_string:
end_quote := indexof("'")
push parsed, substr(token+1, end_quote-1) # omit quotes
token := end_quote + 1
state := regular
else:
# double-quoted is similar but more complex

关于c - 如何标记 shell 输入?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36618834/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com