gpt4 book ai didi

c - 莱克斯/Yacc : "line 1: syntax error, unexpected $end, expecting FUNCTION" lex compile error

转载 作者:太空宇宙 更新时间:2023-11-04 08:08:03 27 4
gpt4 key购买 nike

我正尝试在 lex 和 yacc 中为新语言名称 CSimple 构建我的编译。(这是该语言的手册:http://www.cs.ucsb.edu/~chris/teaching/cs160/projects/language.html)

我需要在给定输入的情况下预先打印解析语法树:

x=x+4;

还有-

procedure foo(i,j :integer) return integer { return 0;}

等等……

对于这个输入,它只打印 lexamas,编译器错误总是说:

50, x20, =50, x30, +56, 439, ; //lexemas for the first input 
4, procedure7650, foo35, (50, i42, ,50, j61, :7, integer36, )7615, return767, integer7640, {7615, return7656, 039, ;41, } //lexemas for the second input
line 1: syntax error, unexpected $end, expecting FUNCTION //The error

这是我的 lex 文件:

%{
#include <stdlib.h>
#include <stdio.h>

#define YYDEBUG 0
void yyerror(const char *);
extern char* yytext;
char* func (char* token, int index);
%}

%%
boolean {func("BOOLEAN", 1);}
true {func("TRUE", 2);}
false {func("FALSE", 3);}
procedure {func("FUNCTION", 4);}
float {func("FLOAT", 5);}
char {func("CHAR", 6);}
integer {func("INT", 7);}
string {func("STRING", 8);}
intptr {func("INTPTR", 9);}
charptr {func("CHARPTR", 10);}
if {func("COND", 11);}
else {func("BLOCK", 12);}
while {func("WHILE_COND", 13);}
var {func("VARIABLE", 14);}
return {func("RETURN", 15);}
null {func("NL", 16);}

\&\& {func("AND", 17);}
\/ {func("DIVISION_OP", 18);}
\/\%.*\%\/ {func("COMMENT", 19);}
\= {func("ASSIGN", 20);}
\=\= {func("EQUAL", 21);}
\> {func("BIGGER_THEN", 22);}
\>\= {func("BIGGER_OR_EQUAL", 23);}
\< {func("SMALLER_THEN", 24);}
\<\= {func("SMALLER_OR_EQUAL", 25);}
\- {func("MINUS", 26);}
\! {func("LOGICAL_NOT", 27);}
\!\= {func("NOT_EQUAL", 28);}
\|\| {func("OR", 29);}
\+ {func("PLUS", 30);}
\* {func("MUL", 31);}
\& {func("ADDRESS_OF", 32);}
\^ {func("DEREFERANCE", 33);}
\^\^ {func("SYNTAX_ERROR", 34);}
\( {func("L_BRACKET", 35);}
\) {func("R_BRACKET", 36);}
\[ {func("L_STRING_INDEX", 37);}
\] {func("R_STRING_INDEX", 38);}
\; {func("EOS", 39);}
\{ {func("OB", 40);}
\} {func("CB", 41);}
\, {func("COMMA", 42);}
\: {func("VAR_DEC", 43);}
\_ {func("UNDERSCORE", 44);}
\|[\-]*[0-9]+\| {func("ABSULUTE_VALUE_OF_INT", 45);}
\|[a-zA-Z0-9]+\| {func("DECLARED_LENGTH_OF_STRING", 46);}
\&[0-9]* {func("LINKER_ERROR", 47);}
\&[a-zA-Z]+[\+|\-|\*|\/][a-zA-Z]+ {func("LINKER_ERROR", 48);}
\&[^STRING_TYPE\[0-9]+\]] {func("LINKER_ERROR", 49);}

[a-zA-Z]+[_]*[a-zA-Z0-9]* {func("IDENTIFIER", 50);}
[\"][a-zA-Z0-9]+[\"] {func("STRING_TYPE", 51);}
[\'].[\'] {func("CHAR_TYPE", 52);}
[\']..+[\'] {func("SYNTAX_ERROR", 53);}
[0-9]+[\.][0-9]+ {func("FLOAT_CONST", 54);}
[\-][0-9]+[\.][0-9]+ {func("FLOAT_CONST", 55);}
0|[1-9]+[0-9]* {func("INTEGER_CONST", 56);}
[\-][1-9]+[0-9]* {func("INTEGER_CONST", 57);}
0[x|X][0-9]+[a-fA-F0-9]*[a-fA-F0-9]* {func("HEX_NUMBER", 58);}
[0][^xX][1-7]+[0-7]* {func("OCTAL_NUMBER", 59);}
[0|1]+[b] {func("BINARY_NUMBER", 60);}
[^IDENTIFIER][\:] {func("SYNTAX_ERROR", 61);}
[IDENTIFIER\,]*[IDENTIFIER\:] {func("PARAMETER_LIST", 62);}

\([.*]\)\[[.*]\] {func("SYNTAX_ERROR", 63);}
[^[[IDENTIFIER|string\[integer\]|[\^][a-zA-Z]]+]][\=] {func("TYPE_MISMATCH_ERROR", 64);}
[a-zA-Z]+[=][a-zA-Z]+[=] {func("SYNTAX_ERROR", 65);}
\"\m\a\i\n\(\)\" {func("CASE_SENSETIVE_ERROR", 66);}
\([^[\)]] {func("SYNTAX_ERROR", 67);}
\{[^[\}]] {func("SYNTAX_ERROR", 68);}
if|while[^\(] {func("SYNTAX_ERROR", 69);}
else[^\{] {func("SYNTAX_ERROR", 70);}

procedure[^[[IDENTIFIER][\(][PARAMETER_LIST]*[\)]return[boolean|char|integer|intptr|charptr][\{]]] {func("FUNC_DECL_ERROR", 71);}
[PARAMETER_LIST][boolean|char|integer|intptr|charptr][\;] {func("DECL_LIST", 72);}
[IDENTIFIER\:][boolean|char|integer|intptr|charptr|string[INTEGER_CONST]][\;] {func("DECL", 73);}
var[^[DECL_LIST|DECL]] {func("DECL_ERROR", 74);}
return[^[[true|false|CHAR_TYPE|INTEGER_CONST][\;]]] {func("RETURN_ERROR", 75);}
[ ]+ {printf("76");}
--[^ \n\;\:\[\]\{\}\(\)\,]+ {func("SYNTAX_ERROR", 77);}
%%
char* func (char* token, int index)
{
printf("%d, %s", index, yytext);
return token;
}

int yywrap(void) {
return 1;
}

这是 yacc 文件:

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int yylex(void);
void yyerror(const char *);

typedef struct node{
char *token;
int line_number;
struct node *left;
struct node *right;
} node;

#define YYSTYPE struct node *

node *mknode(node *left, node *right, char *token);
void printtree(node *tree);
int yacc_line_number = 1;

%}

%error-verbose

%start program

%token BOOLEAN TRUE FALSE FUNCTION FLOAT CHAR INT STRING INTPTR CHARPTR DECL
%token L_BRACKET COND WHILE_COND BLOCK VARIABLE RETURN OB CB EOS FUNC_DECL_ERROR
%token AND DIVISION_OP COMMENT ASSIGN EQUAL BIGGER_THEN BIGGER_OR_EQUAL DECL_ERROR
%token SMALLER_THEN SMALLER_OR_EQUAL MINUS LOGICAL_NOT NOT_EQUAL OR PLUS DECL_LIST
%token MUL ADDRESS_OF SEMANTIC_ERROR DEREFERANCE SYNTAX_ERROR PARAMETER_LIST
%token R_BRACKET L_STRING_INDEX R_STRING_INDEX COMMA VAR_DEC NL TYPE_MISMATCH_ERROR
%token UNDERSCORE ABSULUTE_VALUE_OF_INT DECLARED_LENGTH_OF_STRING IDENTIFIER
%token STRING_TYPE CHAR_TYPE FLOAT_CONST INTEGER_CONST HEX_NUMBER OCTAL_NUMBER
%token BINARY_NUMBER CASE_SENSETIVE_ERROR LINKER_ERROR IFX RETURN_ERROR
%left EOS
//%right ASSIGN

%nonassoc IFX
%nonassoc BLOCK

%%
program:method_declarations {printtree($1);};
method_declarations:method_declaration {$$=$1;}
|method_declarations method_declaration {$$ = mknode($1,$2,"");};

method_declaration:FUNCTION IDENTIFIER L_BRACKET R_BRACKET RETURN type OB statement_block CB {$1->left=$2; $1->right=$8; $$=$1;}
|FUNCTION IDENTIFIER L_BRACKET PARAMETER_LIST type R_BRACKET RETURN type OB statement_block CB {$1->left=$2; $1->right=$10; $$=$1;};
type: BOOLEAN {$$=$1;} | CHAR {$$=$1;} | CHARPTR {$$=$1;} | INTPTR {$$=$1;} | INT {$$=$1;};

statement_block: /* none */ {$$ = 0;} | statement_block statement {$$ = mknode($1,$2,"");};
statement: simple_statement EOS {$$=$1;} | compound_statement {$$=$1;} | OB statement_block CB {$$=$2;};

simple_statement: declarative_statement {$$=$1;}| assignment_statement {$$=$1;};
declarative_statement: VARIABLE IDENTIFIER dec_statement {$1->left = $2; $1->right=$3; $$=$1;};
dec_statement: VAR_DEC type EOS {$$=$2;};
assignment_statement: IDENTIFIER {$$=$1;} | IDENTIFIER ASSIGN expression { $2->left=$1; $2->right=$3; $$=$2;};

expression: or_expression {$$=$1;};
or_expression: and_expression {$$=$1;} | or_expression OR and_expression {$2->left=$1; $2->right=$3; $$=$2;};
and_expression: relop_expression {$$=$1;} | and_expression AND relop_expression {$2->left=$1; $2->right=$3; $$=$2;};
relop_expression: ltgt_expression {$$=$1;} | relop_expression NOT_EQUAL ltgt_expression {$2->left=$1;$2->right=$3; $$=$2;} | relop_expression EQUAL ltgt_expression {$2->left=$1;$2->right=$3; $$=$2;};
ltgt_expression: addop_expression {$$=$1;} | ltgt_expression BIGGER_THEN addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression SMALLER_THEN addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression BIGGER_OR_EQUAL addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression SMALLER_OR_EQUAL addop_expression {$2->left=$1; $2->right=$3; $$=$2;};
addop_expression: mulop_expression {$$=$1;} | addop_expression PLUS mulop_expression {$2->left=$1; $2->right=$3; $$=$2;} | addop_expression MINUS mulop_expression {$2->left=$1; $2->right=$3; $$=$2;};

mulop_expression: term {$$=$1;} | mulop_expression MUL term {$2->left=$1; $2->right=$3; $$=$2;} | mulop_expression DIVISION_OP term {$2->left=$1; $2->right=$3; $$=$2;};

term: LOGICAL_NOT value {$1->left=$2; $$=$1;} | PLUS value {$1->left=$2; $$=$1;} | MINUS value {$1->left=$2; $$=$1;} | value {$$=$1;};
value: IDENTIFIER {$$=$1;} | STRING_TYPE {$$=$1;} | CHAR_TYPE {$$=$1;} | FLOAT_CONST {$$=$1;} | HEX_NUMBER {$$=$1;} | INTEGER_CONST {$$=$1;} | OCTAL_NUMBER {$$=$1;} | BINARY_NUMBER {$$=$1;} | TRUE {$$=$1;} | FALSE {$$=$1;} | L_BRACKET expression R_BRACKET {$$=$2;};

compound_statement: if_statement {$$=$1;} | l_statement {$$=$1;};
if_statement: COND L_BRACKET expression R_BRACKET statement %prec IFX { $1->left=$3; $1->right=$5; $$=$1;}
| COND L_BRACKET expression R_BRACKET statement BLOCK statement {$1->left=$3; $1->right = mknode($5,$6,""); $6->left=$7; $$=$1;};
l_statement: while_statement {$$=$1;};
while_statement: WHILE_COND L_BRACKET expression R_BRACKET statement {$1->left=$3; $1->right=$5; $$=$1;};

%%
#include "lex.yy.c"

int main (void) {yyparse(); return 0;}

node *mknode(node *left, node *right, char *token)
{
node *newnode = (node *)malloc(sizeof(node));
char *newstr = (char *)malloc(sizeof(token)+1);
strcpy(newstr,token);
newnode->left = left;
newnode->right = right;
newnode->token = newstr;
return(newnode);
}

void printtree(node *tree)
{
int i;
static int line1 = 0;
if(!tree){
return;
}
if (tree->line_number > yacc_line_number){
printf("\nLine(%d)",tree->line_number);
yacc_line_number = tree->line_number;
}
if (tree->left || tree->right){
if (tree->line_number == 1 && !line1){
printf("\nLine(%d)",tree->line_number);
line1 = 1;
}
if (tree->line_number>0) {
printf("\n");
for(i = 0; i < tree->line_number; i++){
printf("_");
}
printf("(");
}
}

printf(" %s ",tree->token);

if (tree->left){
printtree(tree->left);
}

if (tree->right){
printtree(tree->right);
}

if (tree->left || tree->right){
printf(")");
}
}

extern int yylineno;

void yyerror(const char *s) {
fprintf(stderr, "line %d: %s\n", yylineno, s);
}

需要帮助 :) 谢谢。

最佳答案

你的词法分析器识别标记并打印它们,但从不将它们返回给解析器,所以它读取整个输入,打印标记,然后返回 $end (EOF, 0) 标记到解析器。解析器看到该标记并给出语法错误,因为它在输入中至少需要一个 method_declaration

您想要的是让您的词法分析器返回它识别出的标记,而不是继续读取更多标记。解析器将重复调用它,每次都期待下一个标记。您的解析器还设置为期望 yylval 中的词法分析器设置的 node * 值。所以你需要像这样的 lex 规则:

boolean { yylval = mknode(0, 0, "BOOLEAN"); return BOOLEAN; }
true { yylval = mknode(0, 0, "TRUE"); return TRUE; }
false { yylval = mknode(0, 0, "FALSE"); return FALSE; }
:

等等。

关于c - 莱克斯/Yacc : "line 1: syntax error, unexpected $end, expecting FUNCTION" lex compile error,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41292555/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com