gpt4 book ai didi

java - Java 中的词法分析器

转载 作者:搜寻专家 更新时间:2023-10-31 19:37:01 25 4
gpt4 key购买 nike

我一直在尝试用 java 编写一个简单的词法分析器。

文件 Token.java 如下所示:

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public enum Token {

TK_MINUS ("-"),
TK_PLUS ("\\+"),
TK_MUL ("\\*"),
TK_DIV ("/"),
TK_NOT ("~"),
TK_AND ("&"),
TK_OR ("\\|"),
TK_LESS ("<"),
TK_LEG ("<="),
TK_GT (">"),
TK_GEQ (">="),
TK_EQ ("=="),
TK_ASSIGN ("="),
TK_OPEN ("\\("),
TK_CLOSE ("\\)"),
TK_SEMI (";"),
TK_COMMA (","),
TK_KEY_DEFINE ("define"),
TK_KEY_AS ("as"),
TK_KEY_IS ("is"),
TK_KEY_IF ("if"),
TK_KEY_THEN ("then"),
TK_KEY_ELSE ("else"),
TK_KEY_ENDIF ("endif"),
OPEN_BRACKET ("\\{"),
CLOSE_BRACKET ("\\}"),
DIFFERENT ("<>"),

STRING ("\"[^\"]+\""),
INTEGER ("\\d"),
IDENTIFIER ("\\w+");

private final Pattern pattern;

Token(String regex) {
pattern = Pattern.compile("^" + regex);
}

int endOfMatch(String s) {
Matcher m = pattern.matcher(s);

if (m.find()) {
return m.end();
}
return -1;
}
}

词法分析器如下:Lexer.java

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Stream;

public class Lexer {
private StringBuilder input = new StringBuilder();
private Token token;
private String lexema;
private boolean exausthed = false;
private String errorMessage = "";
private Set<Character> blankChars = new HashSet<Character>();

public Lexer(String filePath) {
try (Stream<String> st = Files.lines(Paths.get(filePath))) {
st.forEach(input::append);
} catch (IOException ex) {
exausthed = true;
errorMessage = "Could not read file: " + filePath;
return;
}

blankChars.add('\r');
blankChars.add('\n');
blankChars.add((char) 8);
blankChars.add((char) 9);
blankChars.add((char) 11);
blankChars.add((char) 12);
blankChars.add((char) 32);

moveAhead();
}

public void moveAhead() {
if (exausthed) {
return;
}

if (input.length() == 0) {
exausthed = true;
return;
}

ignoreWhiteSpaces();

if (findNextToken()) {
return;
}

exausthed = true;

if (input.length() > 0) {
errorMessage = "Unexpected symbol: '" + input.charAt(0) + "'";
}
}

private void ignoreWhiteSpaces() {
int charsToDelete = 0;

while (blankChars.contains(input.charAt(charsToDelete))) {
charsToDelete++;
}

if (charsToDelete > 0) {
input.delete(0, charsToDelete);
}
}

private boolean findNextToken() {
for (Token t : Token.values()) {
int end = t.endOfMatch(input.toString());

if (end != -1) {
token = t;
lexema = input.substring(0, end);
input.delete(0, end);
return true;
}
}

return false;
}

public Token currentToken() {
return token;
}

public String currentLexema() {
return lexema;
}

public boolean isSuccessful() {
return errorMessage.isEmpty();
}

public String errorMessage() {
return errorMessage;
}

public boolean isExausthed() {
return exausthed;
}
}

并且可以使用 Try.java 进行测试,如下所示:

public class Try {

public static void main(String[] args) {

Lexer lexer = new Lexer("C:/Users/Input.txt");

System.out.println("Lexical Analysis");
System.out.println("-----------------");
while (!lexer.isExausthed()) {
System.out.printf("%-18s : %s \n",lexer.currentLexema() , lexer.currentToken());
lexer.moveAhead();
}

if (lexer.isSuccessful()) {
System.out.println("Ok! :D");
} else {
System.out.println(lexer.errorMessage());
}
}
}

假设 Input.txt

define mine 
a=1000;
b=23.5;

我期望的输出是

define : TK_KEYWORD
mine : IDENTIFIER
a : IDENTIFIER
= : TK_ASSIGN
1000 : INTEGER
; : TK_SEMI
b : IDENTIFIER
= : TK_ASSIGN
23.5 : REAL

但我面临的问题是:它像对待每个数字一样对待

1 INTEGER
0 INTEGER
0 INTEGER
0 INTEGER

它也不识别实数。我得到:

Unexpected symbol: '.'

要获得预期结果需要进行哪些更改?

最佳答案

您要匹配整数的模式是:

INTEGER ("\\d"), 

正好匹配一个数字。

如果你想要不止一个,那就去吧

INTEGER ("\\d+"), 

例如。

而且,为了完成,缺少的其他 float 模式可能看起来像

REAL ("(\\d+)\\.\\d+")

正如评论所指出的那样。或者

REAL ("(\\d*)\\.\\d+")

允许

.23

也是 - 如果这正是您要找的!

关于java - Java 中的词法分析器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43067869/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com