gpt4 book ai didi

c++ - 如何多次使用解析器?

转载 作者:行者123 更新时间:2023-11-30 05:14:35 24 4
gpt4 key购买 nike

SQLParser.h:

class SQLParser{
/*____Variables____*/
private:
std::string _vendor;
antlr4::CommonTokenStream* _tokenStream;
antlr4::Parser* _parser;
antlr4::Lexer* _lexer;

/*____Functions____*/
public:
SQLParser(const std::string& Vendor);
~SQLParser();
antlr4::CommonTokenStream* get_tokens(const std::string& text);
std::vector<std::string> get_lexems(const std::string& text);
antlr4::ParserRuleContext* parse(const std::string& text);
bool check_syntax(const std::string& text);
void print_string_tree(const std::string& text); // parse and print in LISP format
};

SQLParser.cpp:

...
CommonTokenStream* SQLParser::get_tokens(const std::string& text){
(dynamic_cast<ANTLRInputStream*>(_lexer->getInputStream()))->load(text);
_tokenStream->reset();
_tokenStream->fill();
return _tokenStream;
}

std::vector<std::string> SQLParser::get_lexems(const std::string& text){
get_tokens(text);

std::vector<std::string> lexems;
for(auto token : _tokenStream->getTokens()) {
lexems.push_back(token->getText());
}
return lexems;
}

ParserRuleContext* SQLParser::parse(const std::string& text){
get_tokens(text);

_parser->setInputStream(_tokenStream);
ParserRuleContext* tree;
try{
if(_vendor == "tsql"){
tree = (dynamic_cast<tsqlParser*>(_parser))->root();
}
if(_vendor == "mysql"){
tree = (dynamic_cast<mysqlParser*>(_parser))->root();
}
}
catch(std::_Nested_exception<ParseCancellationException>& e){
return nullptr;
}
return tree;
}

为每个具体的供应商 创建一个对象SQLParser我想使用这个对象来解析多个输入文本。但是我对 TokenStream 的大小有疑问。我预计它的大小会动态变化。例如,main 是这样的:ma​​in.cpp:

#include <iostream>
#include <string>

#include <antlr4-runtime.h>
#include "SQLParser.h"

using namespace antlr4;

int main(){
SQLParser parser("tsql");
std::cout << "'select 1;': ";
parser.print_string_tree("select 1;");
std::cout << "\n\n'select 1,2,3;': ";
parser.print_string_tree("select 1,2;");
std::cout << "\n";
return 0;
}

输出如下:

'select 1;': (root (sql_clauses (sql_clause (dml_clause (select_statement (query_expression (query_specification select (select_list (select_list_elem (expression (constant 1)))))) ;)))) <EOF>)

'select 1,2,3;': (root (sql_clauses (sql_clause (dml_clause (select_statement (query_expression (query_specification select (select_list (select_list_elem (expression (constant 1)))))) ,)))) )

我应该如何使用 TokenStream 来避免这个错误?

最佳答案

我和你有类似的设置。上下文类将 lexer + parser + listeners 等放在一起作为一个整体。要使用新输入重新开始解析,您必须让您的 token 流再次重新加载所有 token 。在我的上下文类中,我这样做:

struct MySQLParserContextImpl : public MySQLParserContext {
ANTLRInputStream input;
MySQLLexer lexer;
CommonTokenStream tokens;
MySQLParser parser;
ContextErrorListener errorListener;

bool caseSensitive;
std::vector<ParserErrorInfo> errors;

...

ParseTree *parse(const std::string &text, MySQLParseUnit unit) {
input.load(text);
return startParsing(false, unit);
}

bool errorCheck(const std::string &text, MySQLParseUnit unit) {
parser.removeParseListeners();
input.load(text);
startParsing(true, unit);
return errors.empty();
}

private:
ParseTree *parseUnit(MySQLParseUnit unit) {
switch (unit) {
case MySQLParseUnit::PuCreateSchema:
return parser.createDatabase();
case MySQLParseUnit::PuCreateTable:
return parser.createTable();
case MySQLParseUnit::PuCreateTrigger:
return parser.createTrigger();
case MySQLParseUnit::PuCreateView:
return parser.createView();
case MySQLParseUnit::PuCreateFunction:
return parser.createFunction();
case MySQLParseUnit::PuCreateProcedure:
return parser.createProcedure();
case MySQLParseUnit::PuCreateUdf:
return parser.createUdf();
case MySQLParseUnit::PuCreateRoutine:
return parser.createRoutine();
case MySQLParseUnit::PuCreateEvent:
return parser.createEvent();
case MySQLParseUnit::PuCreateIndex:
return parser.createIndex();
case MySQLParseUnit::PuGrant:
return parser.grant();
case MySQLParseUnit::PuDataType:
return parser.dataTypeDefinition();
case MySQLParseUnit::PuCreateLogfileGroup:
return parser.createLogfileGroup();
case MySQLParseUnit::PuCreateServer:
return parser.createServer();
case MySQLParseUnit::PuCreateTablespace:
return parser.createTablespace();
default:
return parser.query();
}
}

ParseTree *startParsing(bool fast, MySQLParseUnit unit) {
errors.clear();
lexer.reset();
lexer.setInputStream(&input); // Not just reset(), which only rewinds the current position.
tokens.setTokenSource(&lexer);

parser.reset();
parser.setBuildParseTree(!fast);

// First parse with the bail error strategy to get quick feedback for correct queries.
parser.setErrorHandler(std::make_shared<BailErrorStrategy>());
parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(PredictionMode::SLL);

ParseTree *tree;
try {
tree = parseUnit(unit);
} catch (ParseCancellationException &) {
if (fast)
tree = nullptr;
else {
// If parsing was cancelled we either really have a syntax error or we need to do a second step,
// now with the default strategy and LL parsing.
tokens.reset();
parser.reset();
parser.setErrorHandler(std::make_shared<DefaultErrorStrategy>());
parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(PredictionMode::LL);
tree = parseUnit(unit);
}
}

if (errors.empty() && !lexer.hitEOF) {
// There is more input than needed for the given parse unit. Make this a fail as we don't allow
// extra input after the specific rule.
// This part is only needed if the grammar has no explicit EOF token at the end of the parsed rule.
Token *token = tokens.LT(1);
ParserErrorInfo info = {"extraneous input found, expecting end of input",
token->getType(),
token->getStartIndex(),
token->getLine(),
token->getCharPositionInLine(),
token->getStopIndex() - token->getStartIndex() + 1};
errors.push_back(info);
}
return tree;
}
...

关于c++ - 如何多次使用解析器?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43386848/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com