C程序: Parent read one file and child count the number of words.子程序有4个线程并且还使用了mapper和reducer-6ren

C程序: Parent read one file and child count the number of words.子程序有4个线程并且还使用了mapper和reducer

转载作者：行者123 更新时间：2023-11-30 16:56:48

我正在编写一个 C 程序，其中有一个 child 和一个 parent 。父级和子级使用共享内存共享数据。我正在做的是要求父进程将文件写入共享内存，子进程然后从共享内存中读取文件并输出一个列表，显示每个唯一单词的计数。

我要做的是在子程序中使用4个线程，并使用mapper和reducer来完成任务。

文本文件大约有 30000 行。如果我在文本文件中仅传递 20000 行，则我的程序运行正确，但现在正在运行整个文件。

如果有人可以看看我的程序并让我知道哪里出了问题，我将不胜感激。

以下是文本文件的链接:http://cis-linux1.temple.edu/~qzeng/cis5512-fall2016/papers/ANNA_KARENINA.txt

这是我尝试运行的代码:

#include<stdio.h>
#include<stdlib.h>
#include  <sys/types.h>
#include  <sys/ipc.h>
#include  <sys/shm.h>
#include  <time.h>
#include  <unistd.h> 
#include  <pthread.h>
#include  <string.h>
#include  <ctype.h>

#define NUM_THREADS     4

static key_t key = (key_t) 0;
static int size = 0;

struct thread_data
{
int thread_id;
char *msg;
char* wordary[10000][2];
int size;
};

struct thread_data thread_data_array[NUM_THREADS];

void *CountWords(void *threadarg)
{
int taskid, j, i=0, flag=0, index = 0, p, k,z, cnt, m;
char *msg_words, c, *word, buffer[8];
char* word_array[10000][2];
struct thread_data *my_data;
my_data = (struct thread_data *) threadarg;
taskid = my_data->thread_id;
msg_words = my_data->msg;
strcat(msg_words," ");
word = (char*) malloc(20);
word_array[0][0] = (char*) malloc(30);
word_array[0][1] = (char*) malloc(8);
FILE *out;

if(taskid==0)
out=fopen("out.txt","w"); 
//printf("%d\n", strlen(msg_words));
for(j=0; j < strlen(msg_words); j++)
{
    c = msg_words[j];
    c = tolower(c);

    if(c == '\n')
    {
        c = ' ';
    }

    if(!isspace(c))
    {
        word[i++] = c;
    }

    if(c == '\0')
    {
        break;
    }

    if(c == ' ')
    {
        flag = 0;

        for(k=0; k <= index; k++)
        {
        if(0 == strcmp(word_array[k][0],word))
        {
            flag = 1;
            cnt = atoi(word_array[k][1]);
            cnt++;
            sprintf(buffer, "%d", cnt);
            strcpy(word_array[k][1],buffer);
        }
    }

    if(flag == 0)
    {
        strcpy(word_array[index][0],word);
        strcpy(word_array[index][1],"1");
        index++;
        word_array[index][0]=(char*)malloc(30);
        word_array[index][1]=(char*)malloc(8);                  
    }

    for(p=0; p <= 20; p++)
    {
        word[p] = 0;
    }
    i = 0;

    //printf("%d",index);
    }
    //my_data->size = index;
}
printf("%d\n",index);
my_data->size = index;    
for(m = 0; m<index; m++)
{
//printf("%d",m);
    my_data->wordary[m][0] = (char*) malloc(30);
    my_data->wordary[m][1] = (char*) malloc(8);

    strcpy(my_data->wordary[m][0], word_array[m][0]);
    strcpy(my_data->wordary[m][1], word_array[m][1]);

    //printf("%s  %s\n", my_data->wordary[m][0], my_data->wordary[m][1]);
}



pthread_exit((void *)my_data);
}

void  main()
{
 int    ShmID, index = 0;
 char* ShmPTR;
 pid_t  pid;
 int    status;
 clock_t begin, end;
 double time_spent;
 begin = clock();
 FILE *txtfile, *out_file;
 txtfile = fopen("test.txt", "r");

 fseek(txtfile, 0, SEEK_END); // seek to end of file
 size = ftell(txtfile); // get current file pointer
 fseek(txtfile, 0, SEEK_SET);
 //printf("size : %d", size);

 key = ftok(__FILE__,'x');
 ShmID = shmget(key, size, IPC_CREAT | 0666);
 if (ShmID < 0) {
      printf("*** shmget error (server) ***\n");
      exit(1);
 }
 printf("Server has received a shared memory\n");

 ShmPTR = (char *) shmat(ShmID, NULL, 0);
 if (ShmPTR == (char *)(-1)) {
      printf("*** shmat error (server) ***\n");
      exit(1);
 }
 printf("Server has attached the shared memory...\n");

 while(!feof(txtfile))
 {
    ShmPTR[index] = fgetc(txtfile);
    index++;    
 }

 //ShmPTR[index] = '\0';

 printf("Server is about to fork a child process...\n");
 pid = fork();
 if (pid < 0) 
 {
      printf("*** fork error (server) ***\n");
      exit(1);
 }
 else if (pid == 0) 
 {
    printf("   Client process started\n");
    //printf("%s",shm);
    pthread_t threads[NUM_THREADS];
    pthread_attr_t attr;
    int rc, t, shmsz1, shmsz2, shmsz3;
    char* split_ShmPTR[4]; 

    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

    //printf("1111");
    //printf("%d\n",size);
    shmsz1 = (int)(size/4);
    shmsz2 = shmsz1*2;
    shmsz3 = shmsz1*3;

 // printf("%d  %d  %d\n", shmsz1, shmsz2, shmsz3);

    //printf("%c\n",ShmPTR[87]);

    while(ShmPTR[shmsz1] != ' ')
    { 
        shmsz1++;
    }

    //printf("%d  \n", shmsz1);
    //printf("%c1\n",ShmPTR[shmsz1]);
    split_ShmPTR[0] = (char*)malloc(shmsz1 + 1000);
    strncpy(split_ShmPTR[0],ShmPTR,shmsz1);


    while(ShmPTR[shmsz2] != ' ')
    { 
        shmsz2++;
    }

    split_ShmPTR[1] = (char*)malloc(shmsz2-shmsz1 + 1000);
    strncpy(split_ShmPTR[1],ShmPTR + shmsz1,shmsz2-shmsz1);


    while(ShmPTR[shmsz3] != ' ')
    { 
        shmsz3++;
    }

    split_ShmPTR[2] = (char*)malloc(shmsz3-shmsz2 + 1000);
    strncpy(split_ShmPTR[2],ShmPTR + shmsz2,shmsz3-shmsz2);


    split_ShmPTR[3] = (char*)malloc(size-shmsz3 + 10);
    strncpy(split_ShmPTR[3],ShmPTR + shmsz3,size-shmsz3);
//printf("%s\n",split_ShmPTR[3]); 

struct thread_data *my_words;
char* word_array_final[30000][2];
word_array_final[0][0] = (char*)malloc(30);
word_array_final[0][1] = (char*)malloc(8);
int q, r, flag1 = 0,  count, idx = 0, z;
char buff[8];

    for(t = 0; t<NUM_THREADS; t++)
    {
        thread_data_array[t].thread_id = t;
        thread_data_array[t].msg = split_ShmPTR[t];

        rc = pthread_create(&threads[t], NULL, CountWords, (void *) &thread_data_array[t]);
        if (rc)
        {
            printf("ERROR; return code from pthread_create() is %d\n", rc);
            exit(-1);
        }
        //pthread_join(threads[t],(void*)&my_words);
        //printf("%d  %s\n", my_words->thread_id, my_words->wordary[0][0]);
    }
    //pthread_exit(NULL);
    //printf("%s\n", thread_data_array[3].msg);
    pthread_attr_destroy(&attr);
    for(t = 0; t<NUM_THREADS; t++)
    {
        pthread_join(threads[t],(void*)&my_words);
        //printf("%d  %s\n", my_words->thread_id, my_words->wordary[1][0]);
        //printf("%d thread\n", t);
        //printf("%d",my_words->size); 
        if(t == 0)
        {
        //printf("%d  %s\n", my_words->thread_id, my_words->wordary[1][0]);
            for(q = 0; q < my_words->size; q++)
            {
                strcpy(word_array_final[idx][0], my_words->wordary[q][0]);
                strcpy(word_array_final[idx][1], my_words->wordary[q][1]);
                idx++;
                word_array_final[idx][0] = (char*)malloc(30);
            word_array_final[idx][1] = (char*)malloc(8);
            //printf("%s   %s\n", word_array_final[idx][0], word_array_final[idx][1]);
            }
        }
        else
        {
        //printf("%d  %s  %d\n", my_words->thread_id, my_words->wordary[1][0], my_words->size);
            for(q = 0; q<my_words->size; q++)
            {   
                flag1 = 0;
                for(r = 0; r<idx; r++)
                {
                    if(0 == (strcmp(word_array_final[r][0],my_words->wordary[q][0])))
                    {
                        flag1 = 1;
                    count = atoi(my_words->wordary[q][1]) + atoi(word_array_final[r][1]);
                    sprintf(buff, "%d", count);
                    strcpy(word_array_final[r][1],buff);
                    }
                    //printf("%s   %s1\n", word_array_final[idx][0], word_array_final[idx][1]); 
                }

                if(flag1 == 0)
            {
                strcpy(word_array_final[idx][0],my_words->wordary[q][0]);
                strcpy(word_array_final[idx][1],my_words->wordary[q][1]);
                idx++;
                word_array_final[idx][0]=(char*)malloc(30);
                word_array_final[idx][1]=(char*)malloc(8);                  
            }
            }
        }

    }

    out_file=fopen("output.txt","w");

    for(z=0; z<idx; z++)
{
    fprintf(out_file, "%s : %s\n", word_array_final[z][1], word_array_final[z][0]);
}

printf("done");
fclose(out_file);

    //pthread_exit(NULL);

    printf("   Client is about to exit\n");
    exit(0);
 }         
 wait(&status);
 printf("Server has detected the completion of its child...\n");
 shmdt((void *) ShmPTR);
 printf("Server has detached its shared memory...\n");
 shmctl(ShmID, IPC_RMID, NULL);
 printf("Server has removed its shared memory...\n");
 printf("Server exits...\n");
 end = clock();
 time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
 printf("Time spent: %lf\n", time_spent);
 exit(0);
 }

请帮助我，任何帮助都将不胜感激。

最佳答案

我找到了解决方案...这是一个愚蠢的错误...我的数组的大小较小...现在它已修复...并且该程序可以用作子程序 - 使用共享内存的父程序也具有映射器和 reducer 。

关于C程序: Parent read one file and child count the number of words.子程序有4个线程并且还使用了mapper和reducer，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/39809462/

文章推荐： c - 将文件中的值存储到数组

文章推荐：文本字段中的 JavaScript 时间且不更新

文章推荐： c# - 使用集群的 Quartz.net 调度器

一文搞清楚MySQL count(*)、count(1)、count(col)区别
目录 count作用测试 count(*) count(1) count(col) count(id):统计id count(inde
MySQL count(1)、count(*)、count(字段)的区别
目录 1.初识COUNT 2.COUNT(字段)、COUNT(常量)和COUNT(*)之间的区别 3.COUNT(*)的优化 MyIS
sql - SQL Server 2008 中 select count(*)、count(0)、count(100)、count(Id) 之间的区别？
以下 SQL Server 2008 语句之间有什么区别？ SELECT COUNT(*) FROM dbo.Regular_Report SELECT COUNT(0) FROM dbo.Regul
python - 为什么是 str.count ('' ) ≠ (from str.count ('A' ) + str.count ('B' ) + ... + str.count ('Z' ))
如果字符串(短语)中只有元音，它(对我而言)说True；否则说 False。我不明白为什么它总是返回 False，因为 (x >= x) 总是返回 True。我感谢任何人检查此查询的解决方案。 (st
【MySQL】MySQL count(*) count(1) 实现方式以及各种 count 对比
1.概述在这个文章之前，我一直用count(1) 查询所有数据，以前我们都是说 count(*) 是最慢的。但是这个博客恰恰相反。对于 count(主键 id) 来说，InnoDB 引擎会遍历整张
sql - COUNT(*) 与 COUNT(1) 与 COUNT(pk) : which is better?
这个问题已经有答案了: Count(*) vs Count(1) - SQL Server (13 个回答) 已关闭 8 年前。我经常发现这三种变体: SELECT COUNT(*) FROM Fo
sql - 为什么 count(1)、count(column) 和 count(*) 的成本相同？
为什么三个查询的成本相同？我想至少应该有一个更快。否则，只使用关键字 COUNT() 而不是 COUNT(parameter) 就可以了。例如，以下是不依赖于参数的 COUNT() 示例实现: wh
mysql - 如何在连接表上查询 COUNT 并返回 count=0 和 count>0 的记录
我有一个“产品”表和一个“评论”表。我想编写一个查询来返回每个产品的评论的 COUNT 和 AVG。并且如果没有评论，我希望它为 COUNT 和 AVG 返回 0/null。产品表 +-----
iOS 应用崩溃 -[NSCFString count] : when trying to get count of NSMutableArray if count is only 1
我会保持简短和亲切，因为我确信我缺少的是一些简单的东西。我正在尝试获取一个 NSMutableArray 的计数，它可以包含可变数量的对象(id 号)。数组是从 JSon 数据创建的，数组本身是完美创
Splunk Query Count of Count
我想知道查询计数的计数。查询是 sourcetype="cargo_dc_shipping_log" OR sourcetype="cargo_dc_deliver_log" | stats cou
count - sqlalchemy COUNT 和 IF
任何人都知道我如何在 SQL 炼金术中进行计数 COUN(IF(table_row = 1 AND table_row2 =2),1,0) 我做了这样的东西， func.COUNT(func.IF((
MySQL COUNT where 和 COUNT all
我有一个有四列的表(销售)； id, user_id, product_id, and date_added. 我需要统计某个用户已售出的具有特定 id 的产品数量，并获取该用户当月售出的产品总数。
MySQL count of count，将一个表的结果与另一个表一起使用
我是来问这个问题的实现的 MYSQL count of count? 我的问题是将我从一个表中提取结果的结果联系起来，使用它们来查询同一数据库的另一个表 (抱歉，我不是强大的 xySQL)。我有一个
MySQL COUNT(*) GROUP BY HAVING COUNT=?
这是我的查询 SELECT COUNT(*) as total, toys, date FROM T1 WHERE (date >= '2012-06-26'AND date '0') UNION
mysql - COUNT 和同一查询中的子 COUNT
我有 2 个表:成员，订单。 Members: MemberID, DateCreated Orders: OrderID, DateCreated, MemberID 我想找出给定月份中新成员的数
mysql - mySQL/SQL 中的 count(0)、count(1).. 和 count(*) 有什么区别？
我最近在一次采访中被问到这个问题。我在 mySQL 中尝试了这个，并得到了相同的结果(最终结果)。All 给出了该特定表中的行数。谁能解释它们之间的主要区别。最佳答案没什么，除非您在表格中指定字段
count - 桌面 : Count Distinct returns higher value than Count (if connected to Google Big Query)
我有一个包含 2157 条记录的表，假设有 3 列(A、B、C)，我知道在 A 列中有 2154 个不同的值。使用连接到 BigQuery 的 Tableau Desktop(及其自身的功能)，我得
sql - 每辆车相对有 Count(For that Day), Count for last 10 days 和 Count of last 20 days
我试图查看当天的车辆销量，并创建另外两个列来告诉我过去 10 天的销量和过去 20 天的销量。同一天和同一辆车可能有多个销售。我的目标是获取不同的车辆和日期并查看他们的销售数量。 N 天计数应与该行中
MySQL SELECT 1 vs SELECT `field_id` AND COUNT 1 vs COUNT (*) or COUNT (`field_id` ) 性能明智
我有一个非常简单的问题。我想知道某个数据库行是否存在。我通常使用: SELECT 1 FROM `my_table` WHERE `field_x` = 'something' 然后我获取结果: $
java - 一个循环如何由两个线程完成？说由 ist 线程从 count=1 循环到 count=4 并由第二个线程循环 count =5 到 8？
我想要的输出的描述:我想要两个线程 Gaurav 和 john 完成一个 while 循环(从 1 到 8)，这样无论哪个线程启动 ist，都会运行 5 次迭代(即直到 count=5 ) ，然后进入

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

C程序: Parent read one file and child count the number of words.子程序有4个线程并且还使用了mapper和reducer