gpt4 book ai didi

c - 查找字符串中每个字母的频率并将其添加到数组中

转载 作者:行者123 更新时间:2023-11-30 19:13:59 24 4
gpt4 key购买 nike

我编写了这个程序,它打开一个文本文件,其中写入了一些内容,将其中的每个单词添加到字符串中,查找每个单词的频率,从而能够搜索以某个字母开头或结尾的单词,将每个单词的第一个和最后一个字母大写并将它们打印到另一个文本文件(例如字典)中。

我只想让它再做一件事,它已经完成了,但我不知道如何做!我希望它找到字母表中每个字母的频率,将它们从大到小排序,然后当用户输入一个字母时,它会打印它的频率!

我尝试了一些方法,如下所示,但似乎不起作用!

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 100

int main(){
FILE *fp1, *fp2;
char buffer[100];
char *text;
char *word;
char *words[N];
char temp[N];
char ch1,ch2,ch3;
char alphabet[26];
char temp2, temp3;
int i=0, y=0, c=0;;
int word_number=0;
int n=0;
int *freq;
int freq1=0;
int compare=0;
int last_letter=0;
int letter_count[256]={0};
int temp1=0;

fp1 = fopen("text.txt", "r");
fp2 = fopen("output.txt", "w");

if(fp1==NULL){exit(1);
}

while(fgets(buffer,100,fp1)!=NULL){
if(i==0){
text=(char*)malloc(strlen(buffer)+1);
strcpy(text, buffer);
}
else{
text=(char*)realloc(text,n+1+strlen(buffer));
if(text!=NULL){
strcat(text,buffer);
}
else{
free(text);
}
}
n=n+1+strlen(buffer);
i++;
}
i=0;

word=strtok(text," \"\n\t\r,.-;!");
while(word!=NULL){
strcpy(words[i],word); i++; word_number++;
printf("%s\n",word);
word=strtok(NULL," \"\n\t\r,.-;!");
}

for(i=0;i<word_number;i++){ //frequency of words
y=0;
while(y<word_number){
compare=strcmp(words[i],words[y]);
if(compare==0){ freq1++;
}
compare=0;
y++;
}
freq=(int*)malloc(sizeof(int)*word_number);
strcpy(freq[i],freq1); freq1=0;
}

for(i=0;i<word_number;i++){
printf("The word: %s ,appears %d times!\n", words[i], freq[i]);
}

printf("Search all words starting with the letter: ");
scanf("%c", &ch1);
for(i=0;i<word_number;i++){
if(words[i][0]==ch1){
printf("%s , ", words[i]);
}

}

printf("Search all words ending with the letter: ");
scanf("%c", &ch2);
for(i=0;i<word_number;i++){
last_letter=strlen(words[i]);
if(words[i][last_letter]==ch2){
printf("%s , ", words[i]);
}
last_letter=0;
}

for(c=0;c<26;c++){ //here starts the part i need help with
if(words[c]>='a' && words[c]<='z'){
letter_count[words[c]-'a']++;
}
}

for(c=0;c<26;c++){
alphabet[c]=c+'a';
}

for(i=0;i<26;i++){
for(y=i+1;y<26;y++){
if(letter_count[i]>letter_count[y]){
temp1=letter_count[i];
letter_count[i]=letter_count[y];
letter_count[y]=temp;

temp2=alphabet[i];
alphabet[i]=alphabet[y];
alphabet[y]=temp2;
}
}
}

printf("Type a letter to see its frequency: ");
scanf("%c", &ch3);
for(c=0;c<26;c++){
if(alphabet[c]==ch3){
temp3=c;
}
}
printf("The letter '%c' appears %d times!", alphabet[temp3], letter_count[temp3]); //the part ends here

for(i=0; i<word_number-1;i++){ //sorting words alphabetically
for(y=i+1;y<word_number;y++){
if(strcmp(words[i], words[y])>0){
strcpy(temp,words[i]);
strcpy(words[i],words[y]);
strcpy(words[y],temp);
}
}
}

for(i=0;i<word_number;i++){ //printing words to second file
words[i][0]=toUpper(words[i][0]);
last_letter=strlen(words[i]);
words[i][last_letter]=toUpper(words[i][last_letter]);
last_letter=0;
fprintf(fp2,"%s\n", words[i]);
}

fclose(fp1);
fclose(fp2);
return 0;
}

最佳答案

当做任何涉及计数和“简单”频率分析的事情时,我会想到基于直方图的方法。

这看起来比应有的麻烦得多。只需将 str 替换为指向字符串数据的指针,这将负责计算字符实例。

您似乎使用了很多哨兵值,而常量或预处理器宏会更好。这可以改进。

最后,考虑将代码重构为更小的函数,如下面的示例所示。它使阅读变得更容易,并且如果您将来在 StackOverflow 上发布问题,它可以让您将工作代码与非工作代码分开,这样您就可以发布更小的示例,并获得更多/更好的回复。

<小时/>

代码 list

<小时/>
/*******************************************************************************
* Preprocessor directives
******************************************************************************/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define ALPHABET_SIZE (26)

/*******************************************************************************
* Abstract data types
******************************************************************************/
typedef struct histo_t {
int statistics[ALPHABET_SIZE]; // Bins for storing sums
int sum; // In for scaling/normalizing operations
} histo_t;

/*******************************************************************************
* Function prototypes
******************************************************************************/
int CreateHistogram(const char* str, histo_t* pHist);
void PrintHistogram(const histo_t* pHist);

/*******************************************************************************
* Function definitions
******************************************************************************/
/*----------------------------------------------------------------------------*/
int main(void)
{
char str[] = "Hello world. This is a test. ABCDEFGHIJKLMNOPQRSTUVWXYZ. abcdefghijklmnopqrstuvwxyz.\n";

// Create histogram, initialize to zero
histo_t myHistogram = { 0 };

// Generate frequency statistics
if ( CreateHistogram(str, &myHistogram) == 0 )
{
printf("Successfully generated histogram.\n");
}
else
{
printf("Couldn't generate histogram. Aborting.\n");
}

// Print out results
PrintHistogram(&myHistogram);

return 0;
}

/*----------------------------------------------------------------------------*/
int CreateHistogram(const char* str, histo_t* pHist)
{
if ( !str || !pHist )
{
printf("Invalid input.\n");
return (-1);
}

int i;
for ( i = 0; i < strlen(str); i++ )
{
if ( isalpha(str[i]) )
{
int idx = tolower(str[i]) - 'a';
pHist->statistics[idx]++;
}
pHist->sum++;
}

return 0;
}

/*----------------------------------------------------------------------------*/
void PrintHistogram(const histo_t* pHist)
{
if ( !pHist )
{
printf("Invalid input.\n");
return;
}
if ( pHist->sum == 0 )
{
printf("Empty histogram.\n");
}

// Print out results
int i;
for ( i = 0; i < ALPHABET_SIZE; i++ )
{
printf("%c - Count:%d - Frequency:%3.4lf%%\n",
'a' + i, pHist->statistics[i], 100.0 * (double)pHist->statistics[i] / (double)pHist->sum);
}
printf("Total characters:%d\n", pHist->sum);
}
<小时/>

示例输出

<小时/>
Successfully generated histogram.
a - Count:3 - Frequency:3.5294%
b - Count:2 - Frequency:2.3529%
c - Count:2 - Frequency:2.3529%
d - Count:3 - Frequency:3.5294%
e - Count:4 - Frequency:4.7059%
f - Count:2 - Frequency:2.3529%
g - Count:2 - Frequency:2.3529%
h - Count:4 - Frequency:4.7059%
i - Count:4 - Frequency:4.7059%
j - Count:2 - Frequency:2.3529%
k - Count:2 - Frequency:2.3529%
l - Count:5 - Frequency:5.8824%
m - Count:2 - Frequency:2.3529%
n - Count:2 - Frequency:2.3529%
o - Count:4 - Frequency:4.7059%
p - Count:2 - Frequency:2.3529%
q - Count:2 - Frequency:2.3529%
r - Count:3 - Frequency:3.5294%
s - Count:5 - Frequency:5.8824%
t - Count:5 - Frequency:5.8824%
u - Count:2 - Frequency:2.3529%
v - Count:2 - Frequency:2.3529%
w - Count:3 - Frequency:3.5294%
x - Count:2 - Frequency:2.3529%
y - Count:2 - Frequency:2.3529%
z - Count:2 - Frequency:2.3529%
Total characters:85
<小时/>

编辑

根据提供的注释,您应该考虑在 isalpha() 代码块内添加第二个计数器。我的检查计算的是 ASCII 字符总数,而不是字母总数,因此统计数据的含义可能与您所追求的不同。

关于c - 查找字符串中每个字母的频率并将其添加到数组中,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34814573/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com