gpt4 book ai didi

char* 指针在函数中不会递增

转载 作者:行者123 更新时间:2023-11-30 15:01:23 25 4
gpt4 key购买 nike

我知道指针和指针算术是如何工作的,但这让我感到困惑。

我不久前做了一个词法分析器,现在我正在修改它,以便我可以使用解析器,但由于某种原因,它不会迭代我传递给词法分析器的 char*功能。

我用 malloc() 方法分配了一个 char 指针,然后将其传递给对其进行迭代的函数。它执行迭代,但由于某种原因重置回原始地址。我需要 char* 能够更改并留在迭代后完成的地址,否则我无法完成正确的解析器...

main.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lex.h"

int main(int argc, char **argv)
{
struct lexer *token_queue = NULL;

FILE *srcfile = fopen( argv[1], "r+" );
if (!srcfile) {
fprintf(stderr, "file \"%s\" not found, is null\n", argv[1]);
lexer_destroy(&token_queue);
goto hell;
}
fseek(srcfile, 0, SEEK_END); // go all the way to end of file
long file_len = ftell(srcfile); // get total size of the file
rewind(srcfile); // reset the file reader to beginning

char *srcBuffer = malloc(sizeof(char) * (file_len+1)); // pretty sure it's safe to assume char is 1 byte...
memset(srcBuffer, '\0', sizeof(char) * (file_len+1)); // zero the whole buffer

fread(srcBuffer, sizeof(char), (file_len+1), srcfile); // get entire file as a string
char decision[20];

do {
printf("print a token?\n");
scanf("%19s", decision);
lexer_get_single_token(srcBuffer, &token_queue); // tokenize baby!

//printf(" *srcBuffer == %c \n", *srcBuffer);
print_tokens_colored(&token_queue);
}
while (decision[0] == 'y') ; // slowly simulate parser asking for another token!

free(srcBuffer); srcBuffer = NULL;
fclose(srcfile); srcfile = NULL;

lexer_destroy(&token_queue);
hell:;
return 0;
}

词法分析器函数

void lexer_get_single_token(char *iter, struct lexer **Q)
{
char wording[512] = ""; // buffer to hold identifiers, keywords, and strings
unsigned int i = 0;

const char *keywords[] = {
"auto", "const", "double", "float", "int", "short", "struct", "unsigned",
"break", "continue", "else", "for", "long", "signed", "switch", "void",
"case", "default", "enum", "goto", "register", "sizeof", "typedef", "volatile",
"char", "do", "extern", "if", "return", "static", "union", "while",
"inline", "alignof", "_Generic", "bool", "_Bool", "true", "false"
};
printf("*iter == %c\n", *iter);
while ( *iter != '\0' ) {
while ( is_space(*iter) )
++iter;

if (*iter == '/' && iter[1] == '*') { // found C style /**/ comment
do {
++iter;
}
while ( !(*iter == '*' && iter[1] == '/') ); // continuously skip until we find a */
iter += 2; // skip twice to pass over */ and go to the next token.
}

if (*iter == '/' && iter[1] == '/') { // found C++ style // comment
while ( *iter != '\n' )
++iter; // skip until the next line which will be skipped itself.
}

if (*iter == '\\' && iter[1] == '\n') { // formatting Left slash check
lexer_add_token(Q, LeftSlash, "\\", 2);
iter += 2;
return;
}

if (*iter == '\"') { // found string literal, adjust for "\\" so we won't crash
wording[i++] = *iter++; // add the first double quote to buffer
while ( *iter != '\"' ) {
if (*iter == '\\' && iter[1] == '\"' && iter[-1] != '\\') {
wording[i++] = *iter++; // add the literal double quote as well
}
wording[i++] = *iter++;
}
wording[i++] = *iter++; // found the ending double quote, add that too.

if (wording[0] != '\0') {
lexer_add_token(Q, StringConstant, wording, i+1);
reset_string(wording);
return;
}
}

if ( *iter == '\'' ) { // found character literal, adjust for '\\' so we won't crash
wording[i++] = *iter++;
int counter=0;
while (*iter != '\'' && counter < 2) { // Same operation as the string literal but limit as char
if (*iter == '\\' && iter[1] == '\'' && iter[-1] != '\\') {
wording[i++] = *iter++;
}
wording[i++] = *iter++;
++counter;
}
wording[i++] = *iter++; // add ending single quote to buffer

if (wording[0] != '\0') {
lexer_add_token(Q, CharConstant, wording, i+1);
reset_string(wording);
return;
}
}

if (*iter == '0' && (iter[1] == 'x' || iter[1] == 'X')) { // found hexadecimal constant
wording[i++] = *iter++; // copy both 0 and x to buffer
wording[i++] = *iter++;

while ( is_numeral(*iter) ) {
wording[i++] = *iter++; // copy numbers and letters A to F
}
if ( *iter == '.' && is_numeral(iter[1]) ) { // found hexadecimal float
wording[i++] = *iter++;
while ( is_numeral(*iter) )
wording[i++] = *iter++;
if (*iter == 'p' && is_numeral(iter[1])) { // stuff like 0x0.3p10.
wording[i++] = *iter++;
while ( is_numeral(*iter) )
wording[i++] = *iter++;
}
if (wording[0] != '\0') {
lexer_add_token(Q, NumConstantHexFloat, wording, i+1);
reset_string(wording);
return;
}
}
else { // we didn't find a decimal, so tokenize what we found as a normal hex constant
if (wording[0] != '\0') {
lexer_add_token(Q, NumConstantHex, wording, i+1);
reset_string(wording);
return;
}
}
}

while ( is_numeric(*iter) ) { // found decimal constant
wording[i++] = *iter++;
}
if ( *iter == '.' && is_numeric(iter[1]) ) { // found floating point number
wording[i++] = *iter++; // add in the decimal char
while ( is_numeric(*iter) )
wording[i++] = *iter++;

// add the 'e' constant for large floats as well as 'p' (power) constant
if ( (*iter == 'p' || *iter == 'P' || *iter == 'e' || *iter == 'E') && is_numeric(iter[1]) )
{
wording[i++] = *iter++;
while ( is_numeric(*iter) )
wording[i++] = *iter++;
}
if (*iter == 'f' || *iter == 'F') // stuff like 2.0f, add that into the buffer!
wording[i++] = *iter++;

if (wording[0] != '\0') {
lexer_add_token(Q, NumConstantReal, wording, i+1);
reset_string(wording);
return;
}
}
else { // no decimal, consider it a natural number
if (wording[0] != '\0') {
lexer_add_token(Q, NumConstant, wording, i+1);
reset_string(wording);
return;
}
}

if (is_alphabetic(*iter)) { // found an identifier or potential keyword
while (is_potential_identifier(*iter))
wording[i++] = *iter++;

if (wording[0] != '\0') {
int x;
int found_keyword = 0;
for ( x=0 ; x<sizeof keywords/sizeof keywords[0] ; ++x ) {
if ( !strcmp(wording, keywords[x]) )
found_keyword = 1;
}
if (found_keyword)
lexer_add_token(Q, Keyword, wording, i+1);
else lexer_add_token(Q, NumIdent, wording, i+1);
reset_string(wording);
return;
}
}

switch ( *iter ) { // add in individual characters
case '=':
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, EqualCmp, "==", 3);
}
else lexer_add_token(Q, Equal, "=", 2);
++iter;
return;
case ';':
lexer_add_token(Q, Semicolon, ";", 2);
++iter;
return;
case ':':
lexer_add_token(Q, Colon, ";", 2);
++iter;
return;
case '+': // possible uses => left unary is positive, twice unary is increment, once binary is addition
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, PlusEqual, "+=", 3);
}
else if (iter[1] == '+') {
++iter;
lexer_add_token(Q, Increment, "++", 3);
}
else lexer_add_token(Q, Plus, "+", 2);
++iter;
return;
case '-': // possible uses => left unary is negating, twice unary is decrement, one binary is minus
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, MinusEqual, "-=", 3);
}
else if (iter[1] == '-') {
++iter;
lexer_add_token(Q, Decrement, "--", 3);
}
else if (iter[1] == '>') {
++iter;
lexer_add_token(Q, Arrow, "->", 3);
}
else lexer_add_token(Q, Dash, "-", 2);
++iter;
return;
case '*': // leftward unary is dereferencing ptr, binary be mult. Also check for / as ending comment
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, MultEqual, "*=", 3);
}
else lexer_add_token(Q, Asterisk, "*", 2);
++iter;
return;
case '/': // check for * and / as comment EDIT: DONE
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, DivEqual, "/=", 3);
}
else lexer_add_token(Q, DivSlash, "/", 2);
++iter;
return;
case '(':
lexer_add_token(Q, LeftParens, "(", 2);
++iter;
return;
case ')':
lexer_add_token(Q, RiteParens, ")", 2);
++iter;
return;
case '[':
lexer_add_token(Q, LeftSqBracket, "[", 2);
++iter;
return;
case ']':
lexer_add_token(Q, RightSqBracket, "]", 2);
++iter;
return;
case '{':
lexer_add_token(Q, LeftCurlBrace, "{", 2);
++iter;
return;
case '}':
lexer_add_token(Q, RightCurlBrace, "}", 2);
++iter;
return;
case '.':
if (iter[1] == '.' && iter[2] == '.') {
iter += 2;
lexer_add_token(Q, Ellipses, "...", 4);
}
else lexer_add_token(Q, Dot, ".", 2);
++iter;
return;
case ',':
lexer_add_token(Q, Comma, ",", 2);
++iter;
return;
case '<':
if (iter[1] == '<') {
if (iter[2] == '=') {
lexer_add_token(Q, LeftBitShiftEqual, "<<=", 4);
iter += 2;
}
else {
lexer_add_token(Q, LeftBitShift, "<<", 3);
++iter;
}
}
else if (iter[1] == '=') {
lexer_add_token(Q, LessEqual, "<=", 3);
++iter;
}
else lexer_add_token(Q, LeftArrow, "<", 2);
++iter;
return;
case '>':
if (iter[1] == '>') {
if (iter[2] == '=') {
lexer_add_token(Q, RightBitShiftEqual, ">>=", 4);
iter += 2;
}
else {
lexer_add_token(Q, RightBitShift, ">>", 3);
++iter;
}
}
else if (iter[1] == '=') {
lexer_add_token(Q, GreaterEqual, ">=", 3);
++iter;
}
else lexer_add_token(Q, RightArrow, ">", 2);
++iter;
return;
case '?':
lexer_add_token(Q, QuestionMark, "?", 2);
++iter;
return;
case '#':
lexer_add_token(Q, HashSym, "#", 2);
++iter;
return;
case '&':
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, AndEqual, "&=", 3);
}
else if (iter[1] == '&') {
++iter;
lexer_add_token(Q, BoolAnd, "&&", 3);
}
else lexer_add_token(Q, Ampersand, "&", 2);
++iter;
return;
case '^':
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, XorEqual, "^=", 3);
}
else lexer_add_token(Q, Carot, "^", 2);
++iter;
return;
case '%':
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, ModuloEqual, "%=", 3);
}
else lexer_add_token(Q, Percent, "%", 2);
++iter;
return;
case '!':
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, NotEqual, "!=", 3);
}
else lexer_add_token(Q, ExclamationMark, "!", 2);
++iter;
return;
case '|':
if (iter[1] == '=') {
++iter;
lexer_add_token(Q, OrEqual, "|=", 3);
}
else if (iter[1] == '|') {
++iter;
lexer_add_token(Q, BoolOr, "||", 3);
}
else lexer_add_token(Q, VerticalBar, "|", 2);
++iter;
return;
case '~':
lexer_add_token(Q, Tilde, "~", 2);
++iter;
return;
case '@':
lexer_add_token(Q, AtSign, "@", 2);
++iter;
return;
case '$':
lexer_add_token(Q, DollarSign, "$", 2);
++iter;
return;
case '`':
lexer_add_token(Q, GraveAccent, "`", 2);
++iter;
return;
}
++iter;
}

}

最佳答案

C 函数按值传递参数,因此在函数 lexer_get_single_token() 内部,iter 是指针 srcBuffer 的副本。这意味着对 iter 值的更改不会反射(reflect)在 srcBuffer 中。如果您想保留对函数内部 iter 所做的更改,您可以返回指向调用函数的指针,或者添加另一层间接层。

通过将函数签名更改为:

char * lexer_get_single_token(char *iter, struct lexer **Q);

该函数返回一个指向char的指针,当函数完成时可以返回iter。函数调用将如下所示:

srcBuffer = lexer_get_single_token(srcBuffer, &token_queue);

函数调用后,srcBuffer 指向函数结束时由 iter 指示的位置。您应该保存原始 srcBuffer 指针的副本,以便稍后释放它。

或者,您可以将函数签名更改为:

void lexer_get_single_token(char **iter, struct lexer **Q);

现在函数调用将如下所示:

lexer_get_single_token(&srcBuffer, &token_queue);

需要修改函数中的代码以考虑额外的间接寻址,但由于您传入了指向 srcBuffer 的指针,因此您将能够修改 srcBuffer.

关于char* 指针在函数中不会递增,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41455390/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com