gpt4 book ai didi

bison - Lexx 和 Yacc 将 C 语法解析为符号表

转载 作者:行者123 更新时间:2023-12-02 03:28:40 30 4
gpt4 key购买 nike

我正在尝试使用 bison 和 lex 创建一个 c 类型解析器。是的,这是一项学校作业,但我很迷失,而且我在网上完成学校作业,所以我没有得到太多帮助。我需要这个来将信息解析到表中,但我很确定我的 parse.y 文件中有错误,因为它没有将信息加载到表中,它说没有代码。我知道我的 .y 文件缺少一些语法,但我认为其中一些应该按原样加载到符号表中。

运行 make 文件时的输出为:

default:
clear
yacc -d -v --debug parse.y
lex -l scan.l
gcc -o cfc symtab.c lex.yy.c y.tab.c
clean:
$(RM) cfc *.o lex.yy.c parse.tab.c y.tab.h y.output dump.symtab

输出到屏幕:

> yacc -d -v --debug parse.y parse.y:39 parser name defined to default
> :"parse" conflicts: 2 reduce/reduce lex -l scan.l gcc -o cfc symtab.c
> lex.yy.c y.tab.c

通过解析器传递 60.cf:

> semantic error cnt: 0     lines of code: -1

解析器的结果是符号表为空,输出内容如下:

> Starting parse 
> Entering state 0
> Reading a token: Next token is 292 (INT)
> parse error: line 0: Shifting error token, Entering state 1
> Reducing via rule 3 (line 57), error -> block

加载60.cf的示例代码:

int foo()
{
while ( i >= 0 ) {
i = i * 1;
c = c + 1;
}
if ( c >= 0 )
{ ; }
else
{ i = 7;
c = i;
}
return 0;
}

这是 scan.l 文件:

%option yylineno

%{
#ifndef TRUE
#define TRUE 1
#endif

#ifndef FALSE
#define FALSE 0
#endif

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "symtab.h"
#include "y.tab.h"

int badtoken_cnt = 0;
int token_cnt = 0;
int col_cnt = 0;
int lineno = 0;

%}

comment \/\*([^*]|\n)*\*\/
digit [0-9]
ichar [A-Z_a-z]
integer {digit}+
newline \n
strchar ([ ~]|\\n)
identifier {ichar}([0-9]|{ichar})*
whitespace [ \t]+
float ([+-]?{digit}+)?\.{digit}*(e?[+-]?{digit}+)?
chrliteral '([!*]|\\n)'
nullstring \"\"
escquote [^"]*\\\"[^"]*
strliteral \"[^"]*{escquote}*\"
%%

"if" { return IF;}
"then" { return THEN;}
"else" { return ELSE;}
"while" { return WHILE;}
"return" { return RETURN;}
"break" { return GOTO;}
"goto" { return GOTO;}
"read" { return READ;}
"write" { return WRITE;}
"float" { return REAL;}
"int" { return INT;}
"void" { return VOID;}
"char" { return CHAR;}

"=" { return ASSIGN;}
"!=" { return NE;}
"==" { return EQ;}
"<" { return LT;}
"<=" { return LE;}
">" { return GT;}
">=" { return GE;}
"&&" { return AND;}
"||" { return OR;}

"+" { return PLUS;}
"-" { return MINUS;}
"*" { return TIMES;}
"/" { return OVER;}
"%" { return MOD;}

"{" { return LBRACE;}
"}" { return RBRACE;}
"[" { return LBRACK;}
"]" { return RBRACK;}
"(" { return LPAREN;}
")" { return RPAREN;}
";" { return SEMI;}
"," { return COMMA;}

{float} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, REAL_TYPE, lineno);
printf("yytext: %s\n",yytext);
return FLOAT;
}
{integer} {
yylval.tokname = malloc(sizeof(yytext));
printf("yylval: %s\n",yylval.tokname);
strncpy(yylval.tokname,yytext,yyleng);
insert(yytext, yyleng, INT_TYPE, lineno);
printf("yytext: %s\n",yytext);
return INTEGER;
}


{chrliteral} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, -1, lineno);
printf("yytext: %s\n",yytext);
return CHRLIT;
}

{nullstring} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, -1, lineno);
printf("yytext: %s\n",yytext);
return STRLIT;
}

{strliteral} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, STR_TYPE, lineno);
printf("yytext: %s\n",yytext);
return STRLIT;
}

{identifier} {

return IDENT;
}
{newline} { col_cnt = 1; }

{whitespace} { col_cnt+=yyleng; }

{comment} { col_cnt = 0; }

"//" { /* handle C++ style comments */
char c;
do { c = input();
} while (c != '\n');
lineno++;
}

. { return ERROR;}

%%

这是 parse.y 文件,我认为这是错误所在:

%{

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"

#define DEBUG 0
#define TRUE 1
#define FALSE 0
#define MAX_MSG_LEN 50
#define YYDEBUG 1

int errcnt = 0;
char errmsg[40];
extern char *yytext;
extern FILE *yyin;
extern FILE *yyout;
extern int yyparse();
extern int lineno;
int yydebug = 1;
int t;
%}

/* no warning for fewer than 1 shift/reduce conflicts and 0 reduce/reduce */
%expect 1
%union { int tokid;
char *tokname;
}

%token <tokname> IDENT NUMBER
%token <tokid> ASSIGN PLUS LBRACE RBRACE LPAREN RPAREN SEMI ERROR FLOAT INTEGER
/* ADDED */
%token <tokid> IF THEN ELSE WHILE RETURN GOTO READ WRITE VOID CHAR
/* ADDED */
%token <tokid> NE EQ LT LE GT GE AND OR MINUS TIMES OVER MOD INT REAL
/* ADDED */
%token <tokid> LBRACK RBRACK COMMA CHRLIT STRLIT
%type <tokid> block stmt_seq stmt decl expr term assignmnt decltype error

%start block

%%

block : LBRACE stmt_seq RBRACE
| LPAREN stmt_seq RPAREN
| error { yyerrok; return 0; }
;

stmt_seq : stmt_seq stmt SEMI
| stmt SEMI
| error { yyerrok; return 0;}
;

stmt : expr
| decl
| assignmnt { $$ = $1; }
| error { yyerrok; return 0;}
;

decl : decltype IDENT {
setType($2,$1);
fprintf(stdout,"set decltype to: %d for %s\n",$$,$2);
}
;

expr : expr PLUS term
{ /* add constraint here */ }

| term { $$ = $1; }
| error { yyerrok; return 0;}
;

assignmnt : IDENT ASSIGN expr
{ /* add constraint here */ }
;

term : NUMBER { $$ = lookupType($1); }

| IDENT { $$ = lookupType($1); }
;

decltype : INTEGER { $$ = INT_TYPE; }
| FLOAT { $$ = REAL_TYPE; }
;

%%

int main( int argc,char *argv[] )
{
strcpy(errmsg,"type error\n");
int i;
if(argc < 2) {
printf("Usage: ./cfc <source filename>\n");
exit(0);
}
FILE *fp = fopen(argv[1],"r");
if(!fp) {
printf("Unable to open file for reading\n");
exit(0);
}
yyin = fp;

fp = fopen("dump.symtab","w");
if(!fp) {
printf("Unable to open file for writing\n");
exit(0);
}

int flag = yyparse();

/* dump symtab for debugging if necessary */
symtab_dump(fp);
lineno--; /* don't count the last newline */
printf("\nsemantic error cnt: %d \tlines of code: %d\n",errcnt,lineno);

/* cleanup */
fclose(fp);
fclose(yyin);

return flag;
}


yywrap()
{
return(1);
}

int yyerror(char * msg)
{ fprintf(stderr,"%s: line %d: \n",msg,lineno);
return 0;
}

如果您需要它,这是我正在使用的符号表:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"

/* maximum size of hash table */
#define SIZE 200
#define MAXTOKENLEN 40

/* power of two multiplier in hash function */
#define SHIFT 4

/* the hash function */
static int hash ( char * key )
{ int temp = 0;
int i = 0;
while (key[i] != '\0')
{ temp = ((temp << SHIFT) + key[i]) % SIZE;
++i;
}
return temp;
}

/* a linked list of references (line nos) for each variable */
typedef struct RefListRec {
int lineno;
struct RefListRec * next;
/* ADDED */
int type;
} * RefList;


/* hash entry holds variable name and its reference list */
typedef struct HashRec {
char st_name[MAXTOKENLEN];
int st_size;
RefList lines;
int st_value;
/* ADDED */
int st_type;
struct HashRec * next;
} * Node;

/* the hash table */
static Node hashTable[SIZE];

/* insert an entry with its line number - if entry
* already exists just add its reference line no.
*/
void insert( char * name, int len, int type, int lineno )
{
/* ADDED */
/*int len = strlen(name);*/
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) /* variable not yet in table */
{ l = (Node) malloc(sizeof(struct HashRec));
strncpy(l->st_name, name, len);
/* ADDED */
l->st_type = type;
l->lines = (RefList) malloc(sizeof(struct RefListRec));
l->lines->lineno = lineno;
l->lines->next = NULL;
l->next = hashTable[h];
hashTable[h] = l; }
else /* found in table, so just add line number */
{ RefList t = l->lines;
while (t->next != NULL) t = t->next;
t->next = (RefList) malloc(sizeof(struct RefListRec));
t->next->lineno = lineno;
t->next->next = NULL;
}
}

/* return value (address) of symbol if found or -1 if not found */
int lookup ( char * name )
{ int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->st_value;
}

/* return type value of symbol or -1 if symbol not found */
int lookupType( char * name )
{
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->st_type;
}

/* set datatype of symbol returns 0 if symbol not found */
int setType( char * name, int t )
{
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else {
l->st_type = t;
return 0;
}
}

/* print to stdout by default */
void symtab_dump(FILE * of) {
int i;
fprintf(of,"------------ ------ ------------\n");
fprintf(of,"Name Type Line Numbers\n");
fprintf(of,"------------ ------ -------------\n");
for (i=0; i < SIZE; ++i)
{ if (hashTable[i] != NULL)
{ Node l = hashTable[i];
while (l != NULL)
{ RefList t = l->lines;
fprintf(of,"%-12s ",l->st_name);

if (l->st_type == INT_TYPE)
fprintf(of,"%-7s","int ");
if (l->st_type == REAL_TYPE)
fprintf(of,"%-7s","real");
if (l->st_type == STR_TYPE)
fprintf(of,"%-7s","string");


while (t != NULL)
{ fprintf(of,"%4d ",t->lineno);
t = t->next;
}
fprintf(of,"\n");
l = l->next;
}
}
}
}

最佳答案

由于竞争的错误产生而发生冲突。您的 stmt 会产生 error。但是stmt也派生了expr,而expr派生了error。将其放入 expr 中。

查看 yacc 生成的 y.output 文件(因为您传递了 -v)。

您的语法不可能与输入程序匹配,因为您的起始符号派生出大括号或括号括起来的语句 block ,而不是外部函数定义的序列。您需要添加将处理函数定义的语法片段:decltype IDENT arglist body,等等。

一件小事立即脱颖而出:

comment     \/\*([^*]|\n)*\*\/

您是说评论不能包含星号。

匹配真实 C 注释的正则表达式为:[/][*]([^*]|[*]*[^*/])*[*]+[/] (但请注意,这没有考虑 Lex 的换行约定)。

您不需要为单字符标记提供标记名称。这只会增加语法的冗长,你必须写 LBRACE 之类的东西,而不仅仅是“{”。

关于bison - Lexx 和 Yacc 将 C 语法解析为符号表,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/10640290/

30 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com