gpt4 book ai didi

c - 使用 WinAPI 的多线程。计算圆周率

转载 作者:太空宇宙 更新时间:2023-11-04 04:25:32 28 4
gpt4 key购买 nike

我的作业需要使用多线程计算圆周率。问题是——当使用多个线程时,大多数时候总和计算不正确(小于应有的值)。我花了很多时间来弄清楚,所以也许我忽略了一些明显的东西。希望得到您的帮助。

附言有一个关于同一个任务的问题。但是,我的错误是不同的。

这是我的代码。

#include <stdio.h>
#include <Windows.h>
#include <ctime>

#define THREAD_COUNT 2
#define N 10000000
#define BLOCK_SIZE 43087

typedef struct {
unsigned startPos;
int threadInd;
double threadSum;
bool isCalculating;
} ThreadArg;

HANDLE* threads;
HANDLE* events;

DWORD WINAPI ThreadProc(LPVOID);
void calculate(double* sum, unsigned start);

int main() {
unsigned cursor = 0;
ThreadArg* params = NULL;
threads = NULL;

if (THREAD_COUNT > 1) {
params = (ThreadArg*)malloc(sizeof(ThreadArg)*(THREAD_COUNT - 1));
threads = (HANDLE*)malloc(sizeof(HANDLE)*(THREAD_COUNT - 1));
events = (HANDLE*)malloc(sizeof(HANDLE)*(THREAD_COUNT - 1));
}
for (int i = 0; i < THREAD_COUNT - 1; ++i) {
ThreadArg arg;
arg.isCalculating = true;
arg.startPos = cursor;
arg.threadInd = i;
arg.threadSum = 0;
params[i] = arg;
threads[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)ThreadProc, (LPVOID)(&params[i]), CREATE_SUSPENDED, 0);
events[i] = CreateEvent(NULL, TRUE, TRUE, NULL);
cursor += BLOCK_SIZE;
}

unsigned startTime = clock();
for (int i = 0; i < THREAD_COUNT - 1; ++i) {
ResumeThread(threads[i]);
}

double mainSum = 0;
while (cursor < N) {
for (int i = 0; i < THREAD_COUNT - 1; ++i) {
if (!params[i].isCalculating) {
params[i].startPos = cursor;
SetEvent(events[i]);
cursor += BLOCK_SIZE;
}
}
calculate(&mainSum, cursor);
cursor += BLOCK_SIZE;
}

bool allFinished;
do {
allFinished = true;
for (int i = 0; i < THREAD_COUNT - 1; ++i) {
if (params[i].isCalculating) {
allFinished = false;
break;
}
}
} while(!allFinished);

for (int i = 0; i < THREAD_COUNT - 1; ++i) {
mainSum += params[i].threadSum;
CloseHandle(threads[i]);
}

printf("Time: %d\n", clock() - startTime);
printf("PI = %.7f\n", mainSum/N);
free(threads);
free(params);
system("pause");
}

DWORD WINAPI ThreadProc(LPVOID lParam) {
ThreadArg* param = (ThreadArg*)lParam;
while (true) {
ResetEvent(events[param->threadInd]);
calculate(&param->threadSum, param->startPos);
InterlockedDecrement((LONG*)&param->isCalculating);
WaitForSingleObject(events[param->threadInd], INFINITE);
InterlockedIncrement((LONG*)&param->isCalculating);
}
}

void calculate(double* sum, unsigned start) {
int endLoop = start + BLOCK_SIZE;
double x;
for (unsigned i = start; i < endLoop; ++i) {
if (i < N) {
x = (i + 0.5) / N;
*sum += 4 / (1 + x*x);
} else break;
}
}

最佳答案

你根本不需要事件。工作线程永远不必等待。所有这些都可以通过联锁操作来完成。

class CALC_TASK 
{
C_ASSERT(sizeof(double)==sizeof(__int64));

union {
double _sum;
__int64 _value;
};

double _step;
LONG _startPos, _maxPos, _blockSize;

static double calculate(LONG i, LONG count, double step)
{
double x, sum = 0.0;

do
{
x = (i-- - 0.5) * step;
sum += 4.0 / (1.0 + x*x);
} while (--count);

return step * sum;
}

void add(double x)
{
union {
double sum;
__int64 value;
};

union {
double new_sum;
__int64 new_value;
};

sum = _sum;

for ( ; ; value = new_value)
{
new_sum = sum + x;

new_value = _InterlockedCompareExchange64(&_value, new_value, value);

if (new_value == value)
{
return;
}
}
}

BOOL getBlock(LONG& pos, LONG& count)
{
LONG startPos, newPos, curPos;

for (startPos = _startPos ; startPos < _maxPos; startPos = curPos)
{
if ((newPos = startPos + _blockSize) > _maxPos)
{
newPos = _maxPos;
}

curPos = _InterlockedCompareExchange(&_startPos, newPos, startPos);

if (curPos == startPos)
{
pos = newPos, count = newPos - startPos;
return TRUE;
}
}

return FALSE;
}

void calculateEx()
{
LONG i, count;

double step = _step;

while (getBlock(i, count))
{
add(calculate(i, count, step));
}
}

public:

double getsum()
{
return _sum;
}

CALC_TASK(LONG maxPos, LONG blockSize)
{
_startPos = 0, _blockSize = blockSize, _maxPos = maxPos, _sum = 0.0, _step = 1.0 / (double)maxPos;
}

static DWORD CALLBACK ThreadProc(PVOID pct)
{
reinterpret_cast<CALC_TASK*>(pct)->calculateEx();
return 0;
}
};

double test(LONG maxPos, LONG blockSize)
{
SYSTEM_INFO si;
GetNativeSystemInfo(&si);
PHANDLE phThreads = 0;
ULONG nThreads = 0;

CALC_TASK ct(maxPos, blockSize);

if (1 < si.dwNumberOfProcessors)
{
ULONG n = si.dwNumberOfProcessors - 1;

phThreads = (HANDLE*)alloca(n * sizeof(HANDLE));

do
{
if (*phThreads = CreateThread(NULL, PAGE_SIZE, CALC_TASK::ThreadProc, &ct, 0, 0))
{
nThreads++, phThreads++;
}
} while (--n);
}

CALC_TASK::ThreadProc(&ct);

if (nThreads)
{
WaitForMultipleObjects(nThreads, phThreads - nThreads, TRUE, INFINITE);
do
{
CloseHandle(*--phThreads);
} while (--nThreads);
}

return ct.getsum();
}

void testEx()
{
ULONG dwStart = GetTickCount();

double d = test(0x10000000, 0x8000);

dwStart = GetTickCount() - dwStart;

DbgPrint("pi = %.15f, %u milliseconds\n", d, dwStart);
}

在我使用 8 个核心/胎面进行的测试中,0x10000000 步大约需要 0.5 秒。对于单线程,这需要大约 3.5 秒。

作为替代方案,您可以使用 openMP

double calculateOMP(int num_steps) 
{
double x, sum = 0.0, step = 1.0 / (double) num_steps;

#pragma omp parallel for reduction(+:sum) private(x)

for (int i = num_steps; i > 0; --i)
{
x = (i - 0.5) * step;
sum += 4.0 / (1.0 + x*x);
}

return step * sum;
}

为此您需要使用/openmp 选项并将vcomp.lib 添加到链接器输入

关于c - 使用 WinAPI 的多线程。计算圆周率,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42017346/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com