gpt4 book ai didi

c++ - 是否有一个 cpp 文件的分词器

转载 作者:搜寻专家 更新时间:2023-10-31 00:22:58 24 4
gpt4 key购买 nike

我有一个包含大量类实现的 cpp 文件。现在我必须修改源文件本身。

为此,是否有一个库/api/工具可以为我标记这个文件,并在我每次请求时给我一个标记。

我的要求如下。

OpenCPPFile()
While (!EOF)
token = GetNextToken();
process something based on this token
EndWhile
I am happy now

问候,阿杰

最佳答案

Boost.Wave 提供标准 C++ 词法分析器以及许多其他工具,例如构建在 Boost.Spirit 之上的标准预处理器。检查 boost 目录中的以下示例:

C:\boost\libs\wave\samples\lexed_tokens

例如,如果您有以下名为 main.cpp 的文件:

int main()
{
double PI = 3.14, r = 10;
double area = PI * r*r;
}

你应用我命名为 cpp_lex 的词法分析器(假设它们在同一目录中):

c:\cpp_lex main.cpp

你得到:

INT              (#334) at main.cpp (  1/ 1): >int<
SPACE (#392) at main.cpp ( 1/ 4): > <
IDENTIFIER (#380) at main.cpp ( 1/ 5): >main<
LEFTPAREN (#277) at main.cpp ( 1/ 9): >(<
RIGHTPAREN (#294) at main.cpp ( 1/10): >)<
NEWLINE (#394) at main.cpp ( 1/11): >\n<
LEFTBRACE (#274) at main.cpp ( 2/ 1): >{<
NEWLINE (#394) at main.cpp ( 2/ 2): >\n<
SPACE (#392) at main.cpp ( 3/ 1): >\t<
DOUBLE (#321) at main.cpp ( 3/ 2): >double<
SPACE (#392) at main.cpp ( 3/ 8): > <
IDENTIFIER (#380) at main.cpp ( 3/ 9): >PI<
SPACE (#392) at main.cpp ( 3/11): > <
ASSIGN (#258) at main.cpp ( 3/12): >=<
SPACE (#392) at main.cpp ( 3/13): > <
FLOATLIT (#386) at main.cpp ( 3/14): >3.14<
COMMA (#264) at main.cpp ( 3/18): >,<
SPACE (#392) at main.cpp ( 3/19): > <
IDENTIFIER (#380) at main.cpp ( 3/20): >r<
SPACE (#392) at main.cpp ( 3/21): > <
ASSIGN (#258) at main.cpp ( 3/22): >=<
SPACE (#392) at main.cpp ( 3/23): > <
INTLIT (#384) at main.cpp ( 3/24): >10<
SEMICOLON (#297) at main.cpp ( 3/26): >;<
NEWLINE (#394) at main.cpp ( 3/27): >\n<
SPACE (#392) at main.cpp ( 4/ 1): >\t<
DOUBLE (#321) at main.cpp ( 4/ 2): >double<
SPACE (#392) at main.cpp ( 4/ 8): > <
IDENTIFIER (#380) at main.cpp ( 4/ 9): >area<
SPACE (#392) at main.cpp ( 4/13): > <
ASSIGN (#258) at main.cpp ( 4/14): >=<
SPACE (#392) at main.cpp ( 4/15): > <
IDENTIFIER (#380) at main.cpp ( 4/16): >PI<
SPACE (#392) at main.cpp ( 4/18): > <
STAR (#302) at main.cpp ( 4/19): >*<
SPACE (#392) at main.cpp ( 4/20): > <
IDENTIFIER (#380) at main.cpp ( 4/21): >r<
STAR (#302) at main.cpp ( 4/22): >*<
IDENTIFIER (#380) at main.cpp ( 4/23): >r<
SEMICOLON (#297) at main.cpp ( 4/24): >;<
NEWLINE (#394) at main.cpp ( 4/25): >\n<
RIGHTBRACE (#293) at main.cpp ( 5/ 1): >}<
EOF (#401) at main.cpp ( 5/ 2): ><

这是代码,更多信息请查看Boost.Wave manual :

/*=============================================================================
Boost.Wave: A Standard compliant C++ preprocessor library

http://www.boost.org/

Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/

#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>

///////////////////////////////////////////////////////////////////////////////
// Include Wave itself
#include <boost/wave.hpp>

///////////////////////////////////////////////////////////////////////////////
// Include the lexer stuff
#include <boost/wave/cpplexer/cpp_lex_token.hpp> // token class
#include <boost/wave/cpplexer/cpp_lex_iterator.hpp> // lexer class

///////////////////////////////////////////////////////////////////////////////
//
// Special output operator for a lex_token.
//
// Note: this doesn't compile if BOOST_SPIRIT_DEBUG is defined.
//
///////////////////////////////////////////////////////////////////////////////
template <typename PositionT>
inline std::ostream &
operator<< (std::ostream &stream,
boost::wave::cpplexer::lex_token<PositionT> const &t)
{
using namespace std;
using namespace boost::wave;

token_id id = token_id(t);
stream << setw(16)
<< left << boost::wave::get_token_name(id) << " ("
<< "#" << setw(3) << BASEID_FROM_TOKEN(id);

if (ExtTokenTypeMask & id) {
// this is an extended token id
if (AltTokenType == (id & ExtTokenOnlyMask)) {
stream << ", AltTokenType";
}
else if (TriGraphTokenType == (id & ExtTokenOnlyMask)) {
stream << ", TriGraphTokenType";
}
else if (AltExtTokenType == (id & ExtTokenOnlyMask)){
stream << ", AltExtTokenType";
}
}

stream
<< ") at " << t.get_position().get_file() << " ("
<< setw(3) << right << t.get_position().get_line() << "/"
<< setw(2) << right << t.get_position().get_column()
<< "): >";

typedef typename boost::wave::cpplexer::lex_token<PositionT>::string_type
string_type;

string_type const& value = t.get_value();
for (std::size_t i = 0; i < value.size(); ++i) {
switch (value[i]) {
case '\r': stream << "\\r"; break;
case '\n': stream << "\\n"; break;
case '\t': stream << "\\t"; break;
default:
stream << value[i];
break;
}
}
stream << "<";

return stream;
}

///////////////////////////////////////////////////////////////////////////////
// main entry point
int main(int argc, char *argv[])
{
if (2 != argc) {
std::cerr << "Usage: lexed_tokens infile" << std::endl;
return -1;
}

// current file position is saved for exception handling
boost::wave::util::file_position_type current_position;

try {
// Open and read in the specified input file.
std::ifstream instream(argv[1]);
std::string instr;

if (!instream.is_open()) {
std::cerr << "Could not open input file: " << argv[1] << std::endl;
return -2;
}
instream.unsetf(std::ios::skipws);
instr = std::string(std::istreambuf_iterator<char>(instream.rdbuf()),
std::istreambuf_iterator<char>());

// tokenize the input data into C++ tokens using the C++ lexer
typedef boost::wave::cpplexer::lex_token<> token_type;
typedef boost::wave::cpplexer::lex_iterator<token_type> lexer_type;
typedef token_type::position_type position_type;

position_type pos(argv[1]);
lexer_type it = lexer_type(instr.begin(), instr.end(), pos,
boost::wave::language_support(
boost::wave::support_cpp|boost::wave::support_option_long_long));
lexer_type end = lexer_type();

while (it != end) {
current_position = (*it).get_position(); // for error reporting
std::cout << *it << std::endl; // dump the tokenf info
++it;
}
}
catch (boost::wave::cpplexer::lexing_exception const& e) {
// some lexing error
std::cerr
<< e.file_name() << "(" << e.line_no() << "): "
<< e.description() << std::endl;
return 2;
}
catch (std::exception const& e) {
// use last recognized token to retrieve the error position
std::cerr
<< current_position.get_file()
<< "(" << current_position.get_line() << "): "
<< "exception caught: " << e.what()
<< std::endl;
return 3;
}
catch (...) {
// use last recognized token to retrieve the error position
std::cerr
<< current_position.get_file()
<< "(" << current_position.get_line() << "): "
<< "unexpected exception caught." << std::endl;
return 4;
}
return 0;
}

关于c++ - 是否有一个 cpp 文件的分词器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2666310/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com