gpt4 book ai didi

c - 多线程对速度没有改进 - 在 C 中使用 pthread - 为什么?

转载 作者:太空宇宙 更新时间:2023-11-04 05:50:09 25 4
gpt4 key购买 nike

为了更加适应多线程,我编写了一个带有“密集型”计算的小 C 程序。它是 mandelbrot 集的图片,其中每个像素单独计算,然后像素缓冲到行。每个线程都获得相等份额的总行数。因此,例如,如果选择的线程数为两个,则以 1000 行的高度计算的图片应该以两个 500 行的包结束。因此我建议速度减少两倍,但没有改善。为什么???我不明白,因为一切正常而且看起来合乎逻辑。如果有人可以给我提示,我将不胜感激。下面您可以看到 main 函数和一个由 main 调用的用于计算 mandelbrot 集的函数。

int main(int argc, char ** argv, char ** envp) {

if(argc != 4)
{
printf("Bitte genau 3 Argumente eingeben.\n");
return 1;
}
//Structs und Variablen für die Stopuhr
struct timeval start, ende;
long ttlende, ttlstart;

width = str2num(argv[1]);
height = str2num(argv[2]);

int y;
//char blueGreenRed[3];
//Ist Buffer für ganze Zeile: Breite * 3 wegen den 3 Bytes pro Pixel
//char zeile[width*3];

unsigned char info[BMPHEADER_SIZE] = {
//size
'B','M', 0,0,0,0, 0,0, 0,0, 54,0,0,0,
//width //height
40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0, 24,0,
// datasize
0,0,0,0, 0,0,0,0
};

// BMP lines must be of lengths divisible by 4
char span[4] = "\0\0\0\0";
int spanBytes = 4 - ((width * 3) % 4);
if (spanBytes == 4) spanBytes = 0;
int psize = ((width * 3) + spanBytes) * height;

*( (int*) &info[2]) = BMPHEADER_SIZE + psize;
*( (int*) &info[18]) = width;
*( (int*) &info[22]) = height;
*( (int*) &info[34]) = psize;

write(1, (char *) info, BMPHEADER_SIZE);
//Stoppuhr starten, d.h. get time stamp

//create chunks
int threads= str2num(argv[3]);
int i;
int reminder = height%threads;
int blocksize = height/threads;
int rounds = height/blocksize;
int begin = 1;


//init structs
threadinfo *tinfoptr = getptr(rounds);
//threadinfo tinfo = *tinfoptr;
for (i=1; i<=rounds; ++i){
int res = blocksize*i;
if((i==rounds)){
res = res+reminder;
}

//update parameters of tinfo
(*(tinfoptr+(i-1))).from = begin;
(*(tinfoptr+(i-1))).to = res;
(*(tinfoptr+(i-1))).span = span;
(*(tinfoptr+(i-1))).spanBytes = spanBytes;
(*(tinfoptr+(i-1))).width = width;
(*(tinfoptr+(i-1))).height = res-begin+1;
(*(tinfoptr+(i-1))).results = NULL;
(*(tinfoptr+(i-1))).threadno = i;
(*(tinfoptr+(i-1))).blocksizeperthread = -1;
//altes ende ist neuer start des nächsten blocks.
begin = res;
}

fprintf(stderr,"inti abgeschlossen, starte threads\n");

pthread_t myThread[rounds];
for (i=1; i<=rounds; ++i){
fprintf(stderr,"Rufe Thread %d auf\n",i);
if (pthread_create(&myThread[i-1], NULL, myDo2, (void*)(tinfoptr+. (i-1))) ) {
fprintf(stderr, "Error creating thread\n");
return 1;
}
}

gettimeofday(&start, NULL);
for (i=1; i<=rounds; ++i){
/* wait for the second thread to finish */
if (pthread_join(myThread[i-1], NULL)) {
fprintf(stderr, "Error joining thread\n");
return 2;
}
}
//Stoppuhr beenden, d.h. get time stamp, NULL per Doku.
gettimeofday(&ende,NULL);

//if the main thread arrives this position, restulptr containts all rows indexed by the threadnr.
for (i=1; i<=rounds; i++){
//noch countereinbauen
int l_blocksize = (tinfoptr+(i-1))->blocksizeperthread;
for (y=0; y <= l_blocksize; y++) {
//Zeilenweise nach stdout schreiben
write(1, (tinfoptr+(i-1))->results[y], width*3);
// BMP lines must be of lengths divisible by 4
write(1, span, spanBytes);
}
}


ttlende = ende.tv_sec * 1000000 + ende.tv_usec;
ttlstart = start.tv_sec * 1000000 + start.tv_usec;
fprintf(stderr, "\nDauer: %ld Mikrosekunden\n", (ttlende - ttlstart));

return 0;
}

这里调用的函数是:

void* myDo2(void* tiptr){
threadinfo* mythread = (threadinfo*)tiptr;
//copy infos from struct to this thread
int l_from = mythread->from;
int l_to = mythread->to;
int l_width = mythread->width;
int l_height = mythread->height;
// char **container = createMatrix(l_width*3,l_height);
char **container = malloc (l_height * sizeof(char*));
for(int i = 0; i<l_height; i++){
container[i] = malloc(l_width*3*sizeof(char));
}

int x,y;
char iterate=0;
Complex c = {0,0};
Complex newz = {0,0};
float imageRelation = (float)l_width/(float)height;
char blueGreenRed[3];
//Ist Buffer für ganze Zeile: Breite * 3 wegen den 3 Bytes pro Pixel
char zeile[l_width*3];
int counter = 0;

for (y=l_from; y <= l_to; ++y)
{
for (x=1; x <= l_width; ++x) {
Complex z = {0,0};
float quad=0;

c.re = zoom * (-1.0 + imageRelation * ( (x-1.0) / (width-1.0)) );
c.im = zoom * ( 0.5 - (y-1.0) / (height-1.0) );

// iterate
for ( iterate=1; iterate < colorLimit && quad < quadLimit; ++iterate ) {
quad = z.re * z.re + z.im * z.im;

newz.re = (z.re * z.re) - (z.im * z.im) + c.re;
newz.im = z.re * z.im * 2.0 + c.im;

z = newz;
}
toRGB(iterate, blueGreenRed);
//Kopiere 3 Bytes von bgr nach zeile + (x-1)*3
//Beachte: Die Variable zeile ist ein character array daher wird (x-1)*3 benutzt um 3 Byte Pakete pro Pixel in die Zeile zu laden.
memcpy((zeile + (x-1)*3), blueGreenRed, 3);
}
memcpy(container[counter], zeile, l_width*3);
counter++;
}

mythread->blocksizeperthread = counter-1;
mythread->results = container;
fprintf(stderr, "Ich bin Thread-Nr. %d\n", mythread->threadno);
fprintf(stderr, "und habe eine Menge Zeilen von %d\n", mythread->blocksizeperthread);
fprintf(stderr, "und habe berechnet von %d\n", l_from);
fprintf(stderr, "und habe berechnet bis %d\n", l_to);
return NULL;
}

非常感谢,你的错误

最佳答案

简而言之,答案是该模型可以工作,但您需要为每个线程分配足够的工作,使其值得吸收启动、停止和同步线程的开销。并且您必须在能够同时运行多个线程(多核机器)的计算机上运行。

我采用了您提供的应用程序并对其进行了修改以实际编译。如果我在有许多可用 CPU 内核的 Linux 机器上运行它并且给 myDo2 工作线程足够的工作,那么我会看到类似于以下的结果:

./test width height num_threads
./test 10000 10000 1
Dauer: 17,660,185 Mikrosekunden

./test 10000 10000 2
Dauer: 7,864,508 Mikrosekunden

./test 10000 10000 8
Dauer: 1,100,126 Mikrosekunden

这意味着使用 8 个线程时,总挂钟时间从 17.6 秒减少到 1.1 秒,这是一个超过 8 倍的改进(可能是由于更好的内存和缓存使用)。

但是,如果我给每个线程的工作太少,那么我的时间似乎并没有改善,实际上在某些时候会变得更糟。

./test 10 10 1
Dauer: 70 Mikrosekunden

./test 10 10 2
Dauer: 60 Mikrosekunden

./test 10 10 4
Dauer: 205 Mikrosekunden

在这里您可以看到启动线程、然后停止线程并与该线程同步的开销大于线程内部完成的工作量。

所以编程模型可以工作,但您需要正确使用它。

我在 RedHat 上使用编译了下面的代码

gcc -std=gnu99 test.c -o test -l pthread

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <pthread.h>
#include <string.h>

typedef struct _threadinfo
{
int from;
int to;
int width;
int height;
int blocksizeperthread;
char **results;
int threadno;
} threadinfo;

typedef struct _cplx
{
float re;
float im;
} Complex;

void* myDo2( void *tiptr )
{
threadinfo *mythread = (threadinfo *)tiptr;
//copy infos from struct to this thread
int l_from = mythread->from;
int l_to = mythread->to;
int l_width = mythread->width;
int l_height = mythread->height;
char **container = malloc(l_height * sizeof(char *));
for (int i = 0; i < l_height; i++)
{
container[i] = malloc(l_width * 3 * sizeof(char));
}

int x, y;
char iterate = 0;
Complex c = { 0, 0 };
Complex newz = { 0, 0 };
float imageRelation = (float)l_width / (float)l_height;
char blueGreenRed[3];
//Ist Buffer für ganze Zeile: Breite * 3 wegen den 3 Bytes pro Pixel
char zeile[l_width * 3]; //1000*3
int counter = 0;
float zoom = 1.0;
float colorLimit = 10.0;
float quadLimit = 10.0;

for (y = l_from; y <= l_to; ++y) //1..500
{
for (x = 1; x <= l_width; ++x) //1..1000
{
Complex z = { 0, 0 };
float quad = 0;

c.re = zoom * (-1.0 + imageRelation * ((x - 1.0) / (l_width - 1.0)));
c.im = zoom * (0.5 - (y - 1.0) / (l_height - 1.0));

// iterate
for (iterate = 1; iterate < colorLimit && quad < quadLimit; ++iterate)
{
quad = z.re * z.re + z.im * z.im;

newz.re = (z.re * z.re) - (z.im * z.im) + c.re;
newz.im = z.re * z.im * 2.0 + c.im;

z = newz;
}
//toRGB(iterate, blueGreenRed);
//Kopiere 3 Bytes von bgr nach zeile + (x-1)*3
//Beachte: Die Variable zeile ist ein character array daher wird
//(x-1)*3 benutzt um 3 Byte Pakete pro Pixel in die Zeile zu laden.
memcpy((zeile + (x - 1) * 3), blueGreenRed, 3);
}
memcpy(container[counter], zeile, l_width * 3);
counter++;
}

mythread->blocksizeperthread = counter - 1;
mythread->results = container;
fprintf(stderr, "Ich bin Thread-Nr. %d\n", mythread->threadno);
fprintf(stderr, "und habe eine Menge Zeilen von %d\n", mythread->blocksizeperthread);
fprintf(stderr, "und habe berechnet von %d\n", l_from);
fprintf(stderr, "und habe berechnet bis %d\n", l_to);
return NULL;
}

int main(int argc, char **argv, char **envp)
{
if (argc != 4)
{
printf("Bitte genau 3 Argumente eingeben.\n");
return 1;
}
//Structs und Variablen für die Stopuhr
struct timeval start, ende;
long ttlende, ttlstart;
int width;
int height;

width = atoi(argv[1]);
height = atoi(argv[2]);

int y;

// BMP lines must be of lengths divisible by 4
char span[4] = "\0\0\0\0";
int spanBytes = 4 - ((width * 3) % 4);
if (spanBytes == 4) spanBytes = 0;
int psize = ((width * 3) + spanBytes) * height;

//Stoppuhr starten, d.h. get time stamp

//create chunks
int threads = atoi(argv[3]);
int i;
int reminder = height % threads;
int blocksize = height / threads;
int rounds = height / blocksize;
int begin = 1;


//init structs
threadinfo *tinfoptr = malloc( sizeof(threadinfo) * rounds );
//threadinfo tinfo = *tinfoptr;
for (i = 1; i <= rounds; ++i)
{
//res = 500 * 1;
//res = 500*2;
int res = blocksize * i;
if ((i == rounds))
{
res = res + reminder;
}

//update parameters of tinfo
(*(tinfoptr + (i - 1))).from = begin;
(*(tinfoptr + (i - 1))).to = res;
(*(tinfoptr + (i - 1))).width = width;
(*(tinfoptr + (i - 1))).height = res - begin + 1;
(*(tinfoptr + (i - 1))).results = NULL;
(*(tinfoptr + (i - 1))).threadno = i;
(*(tinfoptr + (i - 1))).blocksizeperthread = -1;
//altes ende ist neuer start des nächsten blocks.
begin = res;
}

fprintf(stderr, "inti abgeschlossen, starte threads\n");

pthread_t myThread[rounds];
for (i = 1; i <= rounds; ++i)
{
fprintf(stderr, "Rufe Thread %d auf\n", i);
if (pthread_create(&myThread[i - 1], NULL, myDo2,
(void *)(tinfoptr + (i - 1))))
{
fprintf(stderr, "Error creating thread\n");
return 1;
}
}

gettimeofday(&start, NULL);
for (i = 1; i <= rounds; ++i)
{
/* wait for the second thread to finish */
if (pthread_join(myThread[i - 1], NULL))
{
fprintf(stderr, "Error joining thread\n");
return 2;
}
}
//Stoppuhr beenden, d.h. get time stamp, NULL per Doku.
gettimeofday(&ende, NULL);

ttlende = ende.tv_sec * 1000000 + ende.tv_usec;
ttlstart = start.tv_sec * 1000000 + start.tv_usec;
fprintf(stderr, "\nDauer: %ld Mikrosekunden\n", (ttlende - ttlstart));

return 0;
}

关于c - 多线程对速度没有改进 - 在 C 中使用 pthread - 为什么?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44007554/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com