gpt4 book ai didi

即使我处理了该字符,也无法在解析它时消除数组中的一个字符

转载 作者:太空宇宙 更新时间:2023-11-03 23:42:18 25 4
gpt4 key购买 nike

所以这是我第二次将我的代码调整为 fscanf 以获得我想要的东西。我在输出旁边添加了一些注释。我遇到的主要问题是一个空字符或空格被添加到数组中。我试图检查字符串变量中的空字符和空格,但它没有捕获到它。我有点卡住了,想知道为什么我的代码让那个空字符通过?

出现错误的部分“Pardon, O King”,输出:King -- 1; -- 1所以在这里它解析了一个单词,然后“通过 strip 函数变成了\0,然后我稍后的检查允许它通过??

输入:一个包含撇号和逗号的短篇小说(狮子岩。首先,狮子醒了)

//Output: Every unique word that shows up with how many times it shows up.
//Lion -- 1
//s - 12
//lion -- 8
//tree -- 2
//-- 1 //this is the line that prints a null char?
//cub -- //3 it is not a space! I even check if it is \0 before entering
//it into the array. Any ideas (this is my 2nd time)?
//trying to rewrite my code around a fscanf function.


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>

//Remove non-alpha numeric characters
void strip_word(char* string)
{
char* string_two = calloc(80, sizeof(char));
int i;
int c = 0;
for(i = 0; i < strlen(string); i++)
{
if(isalnum(string[i]))
{
string_two[c] = string[i];
++c;
}
}
string_two[i] = '\0';
strcpy(string, string_two);
free(string_two);
}

//Parse through file
void file_parse(FILE* text_file, char*** word_array, int** count_array, int* total_count, int* unique_count)
{
int mem_Size = 8;
int is_unique = 1;

char** words = calloc(mem_Size, sizeof(char *)); //Dynamically allocate array of size 8 of char*
if (words == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}

int* counts = calloc(mem_Size, sizeof(int)); //Dynamically allocate array of size 8 of int
if (counts == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}

printf("Allocated initial parallel arrays of size 8.\n");
fflush(stdout);

char* string;

while('A')
{
is_unique = 1;

fscanf(text_file, " ,");
fscanf(text_file, " '");

while(fscanf(text_file, "%m[^,' \n]", &string) == 1) //%m length modifier
{
is_unique = 1;
strip_word(string);
if(string == '\0') continue; //if the string is empty move to next iteration
else
{
int i = 0;
++(*total_count);
for(i = 0; i < (*unique_count); i++)
{
if(strcmp(string, words[i]) == 0)
{
counts[i]++;
is_unique = 0;
break;
}
}
if(is_unique)
{
++(*unique_count);
if((*unique_count) >= mem_Size)
{
mem_Size = mem_Size*2;
words = realloc(words, mem_Size * sizeof(char *));
counts = realloc(counts, mem_Size * sizeof(int));
if(words == NULL || counts == NULL)
{
fprintf(stderr, "ERROR: realloc() failed!");
}
printf("Re-allocated parallel arrays to be size %d.\n", mem_Size);
fflush(stdout);
}
words[(*unique_count)-1] = calloc(strlen(string) + 1, sizeof(char));
strcpy(words[(*unique_count)-1], string);
counts[(*unique_count) - 1] = 1;
}
}
free(string);
}
if(feof(text_file)) break;
}
printf("All done (successfully read %d words; %d unique words).\n", *total_count, *unique_count);
fflush(stdout);
*word_array = words;
*count_array = counts;

}

int main(int argc, char* argv[])
{
if(argc < 2 || argc > 3) //Checks if too little or too many args
{
fprintf(stderr, "ERROR: Invalid Arguements\n");
return EXIT_FAILURE;
}

FILE * text_file = fopen(argv[1], "r");
if (text_file == NULL)
{
fprintf(stderr, "ERROR: Can't open file");

}

int total_count = 0;
int unique_count = 0;
char** word_array;
int* count_array;

file_parse(text_file, &word_array, &count_array, &total_count, &unique_count);

fclose(text_file);

int i;

if(argv[2] == NULL)
{
printf("All words (and corresponding counts) are:\n");
fflush(stdout);
for(i = 0; i < unique_count; i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}

else
{
printf("First %d words (and corresponding counts) are:\n", atoi(argv[2]));
fflush(stdout);
for(i = 0; i < atoi(argv[2]); i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}

for(i = 0; i < unique_count; i++)
{
free(word_array[i]);
}
free(word_array);
free(count_array);

return EXIT_SUCCESS;
}

最佳答案

我不太确定您的代码出了什么问题。我正在使用 GCC 6.3.0 在 macOS Sierra 10.12.3 上工作,本地 fscanf() 不支持 m 修饰符。因此,我修改了代码以使用 80 字节的固定大小字符串。当我这样做时(而且只有那样),您的程序运行时没有明显问题(当然是在输入“狮子的岩石。首先,狮子醒来”)。

我还认为 while ('A') 循环(如果完全使用它,应该按照惯例编写 while (1) )是不可取的。我编写了一个函数 read_word() 来获取下一个“单词”,包括跳过空格、逗号和引号,并使用它来控制循环。我在 file_parse() 中保留了您的内存分配不变。我确实摆脱了 strip_word() 中的内存分配(最终——它也像写的那样工作正常)。

这给我留下了:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>

static void strip_word(char *string)
{
char string_two[80];
int i;
int c = 0;
int len = strlen(string);
for (i = 0; i < len; i++)
{
if (isalnum(string[i]))
string_two[c++] = string[i];
}
string_two[c] = '\0';
strcpy(string, string_two);
}

static int read_word(FILE *fp, char *string)
{
if (fscanf(fp, " ,") == EOF ||
fscanf(fp, " '") == EOF ||
fscanf(fp, "%79[^,' \n]", string) != 1)
return EOF;
return 0;
}

static void file_parse(FILE *text_file, char ***word_array, int **count_array, int *total_count, int *unique_count)
{
int mem_Size = 8;

char **words = calloc(mem_Size, sizeof(char *));
if (words == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}

int *counts = calloc(mem_Size, sizeof(int));
if (counts == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}

printf("Allocated initial parallel arrays of size 8.\n");
fflush(stdout);

char string[80];

while (read_word(text_file, string) != EOF)
{
int is_unique = 1;
printf("Got [%s]\n", string);
strip_word(string);
if (string[0] == '\0')
continue;
else
{
int i = 0;
++(*total_count);
for (i = 0; i < (*unique_count); i++)
{
if (strcmp(string, words[i]) == 0)
{
counts[i]++;
is_unique = 0;
break;
}
}
if (is_unique)
{
++(*unique_count);
if ((*unique_count) >= mem_Size)
{
mem_Size = mem_Size * 2;
words = realloc(words, mem_Size * sizeof(char *));
counts = realloc(counts, mem_Size * sizeof(int));
if (words == NULL || counts == NULL)
{
fprintf(stderr, "ERROR: realloc() failed!");
exit(EXIT_FAILURE);
}
printf("Re-allocated parallel arrays to be size %d.\n", mem_Size);
fflush(stdout);
}
words[(*unique_count) - 1] = calloc(strlen(string) + 1, sizeof(char));
strcpy(words[(*unique_count) - 1], string);
counts[(*unique_count) - 1] = 1;
}
}
}
printf("All done (successfully read %d words; %d unique words).\n", *total_count, *unique_count);
fflush(stdout);
*word_array = words;
*count_array = counts;
}

int main(int argc, char *argv[])
{
if (argc < 2 || argc > 3)
{
fprintf(stderr, "ERROR: Invalid Arguements\n");
return EXIT_FAILURE;
}

FILE *text_file = fopen(argv[1], "r");
if (text_file == NULL)
{
fprintf(stderr, "ERROR: Can't open file");
return EXIT_FAILURE;
}

int total_count = 0;
int unique_count = 0;
char **word_array = 0;
int *count_array = 0;

file_parse(text_file, &word_array, &count_array, &total_count, &unique_count);

fclose(text_file);

if (argv[2] == NULL)
{
printf("All words (and corresponding counts) are:\n");
fflush(stdout);
for (int i = 0; i < unique_count; i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
else
{
printf("First %d words (and corresponding counts) are:\n", atoi(argv[2]));
fflush(stdout);
for (int i = 0; i < atoi(argv[2]); i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}

for (int i = 0; i < unique_count; i++)
free(word_array[i]);
free(word_array);
free(count_array);

return EXIT_SUCCESS;
}

在数据文件上运行时:

the lion's rock. First, the lion woke up

输出是:

Allocated initial parallel arrays of size 8.
Got [the]
Got [lion]
Got [s]
Got [rock.]
Got [First]
Got [the]
Got [lion]
Got [woke]
Got [up]
All done (successfully read 9 words; 7 unique words).
All words (and corresponding counts) are:
the -- 2
lion -- 2
s -- 1
rock -- 1
First -- 1
woke -- 1
up -- 1

当代码在您的文本上运行时,包括双引号,如下所示:

$ echo '"Pardon, O King,"' | cw37 /dev/stdin
Allocated initial parallel arrays of size 8.
Got ["Pardon]
Got [O]
Got [King]
Got ["]
All done (successfully read 3 words; 3 unique words).
All words (and corresponding counts) are:
Pardon -- 1
O -- 1
King -- 1
$

对代码进行了一些修改。如果没有字母字符,您的代码仍然会计算它(因为 strip_word() 中的细微问题)。这需要通过更仔细地检查 strip_word() 来处理;你测试 if (string == '\0') 检查(迟来的)内存是否被分配到你需要的地方 if (string[0] == '\0') 测试字符串是否为空。

请注意,如果一行中有两个逗号,或者一个撇号后跟一个逗号(尽管它处理逗号后跟一个撇号),read_word() 中的代码将被混淆为报告 EOF好的)。固定那个比较麻烦;您最好使用带有 getc() 的循环来读取字符串。您甚至可以使用该循环去除非字母字符,而无需单独的 strip_word() 函数。

我假设您还没有学习结构。如果你有覆盖结构,你会使用结构的数组,例如 struct Word { char *word;整数计数; }; 并一次分配内存,而不是需要两个并行数组。

关于即使我处理了该字符,也无法在解析它时消除数组中的一个字符,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41968845/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com