c - 查找字符串中每个字母的频率并将其添加到数组中

转载作者：行者123 更新时间：2023-11-30 19:13:59

我编写了这个程序，它打开一个文本文件，其中写入了一些内容，将其中的每个单词添加到字符串中，查找每个单词的频率，从而能够搜索以某个字母开头或结尾的单词，将每个单词的第一个和最后一个字母大写并将它们打印到另一个文本文件(例如字典)中。

我只想让它再做一件事，它已经完成了，但我不知道如何做!我希望它找到字母表中每个字母的频率，将它们从大到小排序，然后当用户输入一个字母时，它会打印它的频率!

我尝试了一些方法，如下所示，但似乎不起作用!

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 100

int main(){
    FILE *fp1, *fp2;
    char buffer[100];
    char *text;
    char *word;
    char *words[N];
    char temp[N];
    char ch1,ch2,ch3;
    char alphabet[26];
    char temp2, temp3;
    int i=0, y=0, c=0;;
    int word_number=0;
    int n=0;
    int *freq;
    int freq1=0;
    int compare=0;
    int last_letter=0;
    int letter_count[256]={0};
    int temp1=0;

    fp1 = fopen("text.txt", "r");
    fp2 = fopen("output.txt", "w");

    if(fp1==NULL){exit(1);
    }

    while(fgets(buffer,100,fp1)!=NULL){
        if(i==0){
            text=(char*)malloc(strlen(buffer)+1);
            strcpy(text, buffer);
        }
        else{
            text=(char*)realloc(text,n+1+strlen(buffer));
            if(text!=NULL){
                strcat(text,buffer);
            }
            else{
                free(text);
            }
        }
        n=n+1+strlen(buffer);
        i++;
    }
    i=0;

    word=strtok(text," \"\n\t\r,.-;!");
    while(word!=NULL){
        strcpy(words[i],word); i++; word_number++;
        printf("%s\n",word);
        word=strtok(NULL," \"\n\t\r,.-;!");
    }

    for(i=0;i<word_number;i++){   //frequency of words
        y=0;
        while(y<word_number){
            compare=strcmp(words[i],words[y]);
            if(compare==0){ freq1++;
            }
            compare=0;
            y++; 
        }
        freq=(int*)malloc(sizeof(int)*word_number);
        strcpy(freq[i],freq1); freq1=0;
    }

    for(i=0;i<word_number;i++){
        printf("The word: %s ,appears %d times!\n", words[i], freq[i]);
    }

    printf("Search all words starting with the letter: ");
    scanf("%c", &ch1);                                   
    for(i=0;i<word_number;i++){
        if(words[i][0]==ch1){
            printf("%s , ", words[i]);
        }

    }

    printf("Search all words ending with the letter: ");
    scanf("%c", &ch2);
    for(i=0;i<word_number;i++){
        last_letter=strlen(words[i]);
        if(words[i][last_letter]==ch2){
            printf("%s , ", words[i]);
        }
        last_letter=0;
    }

    for(c=0;c<26;c++){  //here starts the part i need help with
        if(words[c]>='a' && words[c]<='z'){
            letter_count[words[c]-'a']++;
        }
    }

    for(c=0;c<26;c++){
        alphabet[c]=c+'a';
    }

    for(i=0;i<26;i++){     
        for(y=i+1;y<26;y++){
            if(letter_count[i]>letter_count[y]){
                temp1=letter_count[i];
                letter_count[i]=letter_count[y];
                letter_count[y]=temp;

                temp2=alphabet[i];
                alphabet[i]=alphabet[y];
                alphabet[y]=temp2;
            }
        }
    }

    printf("Type a letter to see its frequency: ");
    scanf("%c", &ch3);
    for(c=0;c<26;c++){
        if(alphabet[c]==ch3){
            temp3=c;
        }
    }
    printf("The letter '%c' appears %d times!", alphabet[temp3], letter_count[temp3]); //the part ends here

    for(i=0; i<word_number-1;i++){   //sorting words alphabetically
        for(y=i+1;y<word_number;y++){
            if(strcmp(words[i], words[y])>0){
                strcpy(temp,words[i]);
                strcpy(words[i],words[y]);
                strcpy(words[y],temp);
            }
        }
    }

    for(i=0;i<word_number;i++){        //printing words to second file
        words[i][0]=toUpper(words[i][0]);
        last_letter=strlen(words[i]); 
        words[i][last_letter]=toUpper(words[i][last_letter]);
        last_letter=0;
        fprintf(fp2,"%s\n", words[i]);
    }

    fclose(fp1);
    fclose(fp2);
    return 0;
}

最佳答案

当做任何涉及计数和“简单”频率分析的事情时，我会想到基于直方图的方法。

这看起来比应有的麻烦得多。只需将 str 替换为指向字符串数据的指针，这将负责计算字符实例。

您似乎使用了很多哨兵值，而常量或预处理器宏会更好。这可以改进。

最后，考虑将代码重构为更小的函数，如下面的示例所示。它使阅读变得更容易，并且如果您将来在 StackOverflow 上发布问题，它可以让您将工作代码与非工作代码分开，这样您就可以发布更小的示例，并获得更多/更好的回复。

<小时/>

代码 list

<小时/>

/*******************************************************************************
 * Preprocessor directives
 ******************************************************************************/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define ALPHABET_SIZE   (26)

/*******************************************************************************
 * Abstract data types
 ******************************************************************************/
typedef struct histo_t {
    int statistics[ALPHABET_SIZE];  // Bins for storing sums
    int sum;    // In for scaling/normalizing operations
} histo_t;

/*******************************************************************************
 * Function prototypes
 ******************************************************************************/
int CreateHistogram(const char* str, histo_t* pHist);
void PrintHistogram(const histo_t* pHist);

/*******************************************************************************
 * Function definitions
 ******************************************************************************/
/*----------------------------------------------------------------------------*/
int main(void)
{
    char str[] = "Hello world. This is a test. ABCDEFGHIJKLMNOPQRSTUVWXYZ. abcdefghijklmnopqrstuvwxyz.\n";

    // Create histogram, initialize to zero
    histo_t myHistogram = { 0 };

    // Generate frequency statistics
    if ( CreateHistogram(str, &myHistogram) == 0 )
    {
        printf("Successfully generated histogram.\n");
    }
    else
    {
        printf("Couldn't generate histogram. Aborting.\n");
    }

    // Print out results
    PrintHistogram(&myHistogram);

    return 0;
}

/*----------------------------------------------------------------------------*/
int CreateHistogram(const char* str, histo_t* pHist)
{
    if ( !str || !pHist )
    {
        printf("Invalid input.\n");
        return (-1);
    }

    int i;
    for ( i = 0; i < strlen(str); i++ )
    {
        if ( isalpha(str[i]) )
        {
            int idx = tolower(str[i]) - 'a';
            pHist->statistics[idx]++;
        }
        pHist->sum++;
    }

    return 0;
}

/*----------------------------------------------------------------------------*/
void PrintHistogram(const histo_t* pHist)
{
    if ( !pHist )
    {
        printf("Invalid input.\n");
        return;
    }
    if ( pHist->sum == 0 )
    {
        printf("Empty histogram.\n");
    }

    // Print out results
    int i;
    for ( i = 0; i < ALPHABET_SIZE; i++ )
    {
        printf("%c - Count:%d - Frequency:%3.4lf%%\n",
                'a' + i, pHist->statistics[i], 100.0 * (double)pHist->statistics[i] / (double)pHist->sum);
    }
    printf("Total characters:%d\n", pHist->sum);
}

<小时/>

示例输出

<小时/>

Successfully generated histogram.
a - Count:3 - Frequency:3.5294%
b - Count:2 - Frequency:2.3529%
c - Count:2 - Frequency:2.3529%
d - Count:3 - Frequency:3.5294%
e - Count:4 - Frequency:4.7059%
f - Count:2 - Frequency:2.3529%
g - Count:2 - Frequency:2.3529%
h - Count:4 - Frequency:4.7059%
i - Count:4 - Frequency:4.7059%
j - Count:2 - Frequency:2.3529%
k - Count:2 - Frequency:2.3529%
l - Count:5 - Frequency:5.8824%
m - Count:2 - Frequency:2.3529%
n - Count:2 - Frequency:2.3529%
o - Count:4 - Frequency:4.7059%
p - Count:2 - Frequency:2.3529%
q - Count:2 - Frequency:2.3529%
r - Count:3 - Frequency:3.5294%
s - Count:5 - Frequency:5.8824%
t - Count:5 - Frequency:5.8824%
u - Count:2 - Frequency:2.3529%
v - Count:2 - Frequency:2.3529%
w - Count:3 - Frequency:3.5294%
x - Count:2 - Frequency:2.3529%
y - Count:2 - Frequency:2.3529%
z - Count:2 - Frequency:2.3529%
Total characters:85

<小时/>

编辑

根据提供的注释，您应该考虑在 isalpha() 代码块内添加第二个计数器。我的检查计算的是 ASCII 字符总数，而不是字母总数，因此统计数据的含义可能与您所追求的不同。

关于c - 查找字符串中每个字母的频率并将其添加到数组中，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/34814573/

文章推荐： c# - 如何在运行时更改元素样式？

文章推荐： C++ 确保绝对没有标准输入缓冲

文章推荐：调用使用指针接收数组的函数

c++ int 数组，值为 2 维 int 数组(3d 数组)
我正在尝试创建一个包含 int[][] 项的数组即 int version0Indexes[][4] = { {1,2,3,4}, {5,6,7,8} }; int version1Indexes[
Java 数组[i]++ 与++数组[i]
我有一个整数数组: private int array[]; 如果我还有一个名为 add 的方法，那么以下有什么区别: public void add(int value) { array[va
JavaScript 数组 + 数组 = 字符串？
当您尝试在 JavaScript 中将一个数组添加到另一个数组时，它会将其转换为一个字符串。通常，当以另一种语言执行此操作时，列表会合并。 JavaScript [1, 2] + [3, 4] = "
数组
根据我正在阅读的教程，如果您想创建一个包含 5 列和 3 行的表格来表示这样的数据... 45 4 34 99 56 3 23 99 43 2 1 1 0 43 67 ...它说你可以使用下
数组
我通常使用 python 编写脚本/程序，但最近开始使用 JavaScript 进行编程，并且在使用数组时遇到了一些问题。在 python 中，当我创建一个数组并使用 for x in y 时，我得
数组 toString() 中的 javascript 数组
我有一个这样的数组: temp = [ 'data1', ['data1_a','data1_b'], ['data2_a','data2_b','data2_c'] ]; // 我想使用 toStr
php - 如何将秒表结果(数组)推送到第一个表结果(数组)
rent_property (table name) id fullName propertyName 1 A House Name1 2 B
C++ 数组 [索引] 与索引 [数组]
这个问题在这里已经有了答案: 关闭13年前。 Possible Duplicate: In C arrays why is this true? a[5] == 5[a] array[index] 和
excel - 将用户名(数组)与电子邮件(数组)匹配
使用 Excel 2013。经过多年的寻找和适应，我的第一篇文章。我正在尝试将当前 App 用户(即“John Smith”)与他的电子邮件地址“jsmith@work.com”进行匹配。使用两个
r - 3D 数组 -> 应用 -> 3D 数组
当仅在一个边距上操作时，apply 似乎不会重新组装 3D 数组。考虑: arr 1)，但对我来说仍然很奇怪，如果一个函数返回一个具有尺寸的对象，那么它们基本上会被忽略。最佳答案这是一个不太理
javascript - php 数组(数组)到 javascript
我有一个包含 GPS 坐标的 MySQL 数据库。这是我检索坐标的部分 PHP 代码； $sql = "SELECT lat, lon FROM gps_data"; $stmt=$db->query
python - 查找最后一个非零元素 3D 数组 - numpy 数组
我需要找到一种方法来执行这个操作，我有一个形状数组 [批量大小, 150, 1] 代表 batch_size 整数序列，每个序列有 150 个元素长，但在每个序列中都有很多添加的零，以使所有序列具有相
android - 如何在json中访问对象>数组>对象>数组>对象？
我必须通过 url 中的 json 获取文本。层次结构如下: 对象>数组>对象>数组>对象。我想用这段代码获取文本。但是我收到错误 :org.json.JSONException: No valu
cocoa - NSMutable NSArray 数组 - 如何避免所有这些行并使用维度或 3D 数组？
enter code here- (void)viewDidLoad { NSMutableArray *imageViewArray= [[NSMutableArray alloc] init];
java - 流式传输 2d 数组、修剪值并收集回 2d 数组
知道如何对二维字符串数组执行修剪操作，例如使用 Java 流 API 进行 3x3 并将其收集回相同维度的 3x3 数组？重点是避免使用显式的 for 循环。当前的解决方案只是简单地执行一个 fo
使用嵌套循环的 Java Union 数组 2 int 数组
已关闭。此问题需要 debugging details 。目前不接受答案。编辑问题以包含 desired behavior, a specific problem or error, and the
Jquery 与 JSON 数组 - 转换为 Javascript 数组
我有来自 ASP.NET Web 服务的以下 XML 输出: 1710 1711 1712 1713
javascript - 更新嵌套数组和对象中的对象。对象-->数组-->对象-->数组--> "object"
如果我有一个对象todo作为您状态的一部分，并且该对象包含数组列表，则列表内部有对象，在这些对象内部还有另一个数组listItems。如何更新数组 listItems 中 id 为“poi098”的对
c# - 如何在一个字节中转换 bool 数组，然后再转换回 bool 数组
我想将最大长度为 8 的 bool 数组打包成一个字节，通过网络发送它，然后将其解压回 bool 数组。已经在这里尝试了一些解决方案，但没有用。我正在使用单声道。我制作了 BitArray，然后尝试
c# - 将 char 数组/字符串转换为 bool 数组
我们的数据库中有这个字段指示一周中的每一天的真/假标志，如下所示:'1111110' 我需要将此值转换为 boolean 数组。为此，我编写了以下代码: char[] freqs = weekday

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c - 查找字符串中每个字母的频率并将其添加到数组中

代码 list

示例输出

编辑