gpt4 book ai didi

multithreading - OS X GCD 多线程并发使用更多 CPU 但执行速度比单线程慢

转载 作者:行者123 更新时间:2023-11-28 07:17:46 26 4
gpt4 key购买 nike

我有一个方法可以进行一系列计算,这些计算需要相当多的时间才能完成。此方法对其进行计算的对象是在运行时生成的,范围从几个到几千个不等。显然,如果我可以同时跨多个线程运行这些计算会更好,但是当我尝试这样做时,我的程序使用了更多的 CPU,但比一个一个地运行它们花费的时间更长。有什么想法吗?

let itemsPerThread = (dataArray.count / 4) + 1

for var i = 0; i < dataArray.count; i += itemsPerThread
{

let name = "ComputationQueue\(i)".bridgeToObjectiveC().cString()
let compQueue = dispatch_queue_create(name, DISPATCH_QUEUE_CONCURRENT)
dispatch_async(compQueue,
{
let itemCount = i + itemsPerThread < dataArray.count ? itemsPerThread : dataArray.count - i - 1

let subArray = dataArray.bridgeToObjectiveC().subarrayWithRange(NSMakeRange(i, dataCount)) as MyItem[]
self.reallyLongComputation(subArray, increment: increment, outputIndex: self.runningThreads-1)
})

NSThread.sleepForTimeInterval(1)
}

或者:如果我运行同样的东西,但是单个 dispatch_async 调用并且在整个 dataArray 而不是子数组上,它完成得更快,同时使用更少的 CPU。

最佳答案

你(这是我的猜测)想要做的应该看起来像

//
// main.swift
// test
//
// Created by user3441734 on 12/11/15.
// Copyright © 2015 user3441734. All rights reserved.
//

import Foundation

let computationGroup = dispatch_group_create()

var arr: Array<Int> = []
for i in 0..<48 {
arr.append(i)
}
print("arr \(arr)")

func job(inout arr: Array<Int>, workers: Int) {
let count = arr.count
let chunk = count / workers
guard chunk * workers == count else {
print("array.cout divided by workers must by integer !!!")
return
}
let compQueue = dispatch_queue_create("test", DISPATCH_QUEUE_CONCURRENT)
let syncQueue = dispatch_queue_create("aupdate", DISPATCH_QUEUE_SERIAL)

for var i = 0; i < count; i += chunk
{
let j = i
var tarr = arr[j..<j+chunk]
dispatch_group_enter(computationGroup)
dispatch_async(compQueue) { () -> Void in
for k in j..<j+chunk {
// long time computation
var z = 100000000
repeat {
z--
} while z > 0
// update with chunk
tarr[k] = j
}
dispatch_async(syncQueue, { () -> Void in
for k in j..<j+chunk {
arr[k] = tarr[k]
}
dispatch_group_leave(computationGroup)
})

}
}
dispatch_group_wait(computationGroup, DISPATCH_TIME_FOREVER)
}
var stamp: Double {
return NSDate.timeIntervalSinceReferenceDate()
}

print("running on dual core ...\n")
var start = stamp
job(&arr, workers: 1)
print("job done by 1 worker in \(stamp-start) seconds")
print("arr \(arr)\n")

start = stamp
job(&arr, workers: 2)
print("job done by 2 workers in \(stamp-start) seconds")
print("arr \(arr)\n")

start = stamp
job(&arr, workers: 4)
print("job done by 4 workers in \(stamp-start) seconds")
print("arr \(arr)\n")

start = stamp
job(&arr, workers: 6)
print("job done by 6 workers in \(stamp-start) seconds")
print("arr \(arr)\n")

有结果

arr [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]
running on dual core ...

job done by 1 worker in 5.16312199831009 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

job done by 2 workers in 2.49235796928406 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24]

job done by 4 workers in 3.18479603528976 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36]

job done by 6 workers in 2.51704299449921 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 40, 40, 40]

Program ended with exit code: 0

...您可以使用下一个模式在任意数量的 worker 之间分配工作(为您提供最佳性能的 worker 数量取决于 worker 定义和您环境中可用的资源)。通常对于任何类型的长时间计算(转换),您都可以获得一些性能提升。在两个核心环境中最高可达 50%。如果您的工作人员使用“默认”使用更多内核的高度优化函数,则性能增益几乎为零:-)

// generic implementation
// 1) job distribute data between workers as fair, as possible
// 2) workers do their task in parallel
// 3) the order in resulting array reflect the input array
// 4) there is no requiremets of worker block, to return
// the same type as result of yor 'calculation'

func job<T,U>(arr: [T], workers: Int, worker: T->U)->[U] {

guard workers > 0 else { return [U]() }

var res: Dictionary<Int,[U]> = [:]

let workersQueue = dispatch_queue_create("workers", DISPATCH_QUEUE_CONCURRENT)
let syncQueue = dispatch_queue_create("sync", DISPATCH_QUEUE_SERIAL)
let group = dispatch_group_create()

var j = min(workers, arr.count)
var i = (0, 0, arr.count)
var chunk: ArraySlice<T> = []

repeat {
let a = (i.1, i.1 + i.2 / j, i.2 - i.2 / j)
i = a
chunk = arr[i.0..<i.1]
dispatch_group_async(group, workersQueue) { [i, chunk] in
let arrs = chunk.map{ worker($0) }
dispatch_sync(syncQueue) {[i,arrs] in
res[i.0] = arrs
}
}
j--
} while j != 0

dispatch_group_wait(group, DISPATCH_TIME_FOREVER)
let idx = res.keys.sort()
var results = [U]()
idx.forEach { (idx) -> () in
results.appendContentsOf(res[idx]!)
}
return results
}

关于multithreading - OS X GCD 多线程并发使用更多 CPU 但执行速度比单线程慢,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24706759/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com