c++ - 使用 Boost::Fiber 的多个共享工作池-6ren

c++ - 使用 Boost::Fiber 的多个共享工作池

转载作者：行者123 更新时间：2023-11-30 04:59:47

我一直在研究 boost::fibers 作为处理我的一些数据处理和 IO 问题的方法。 shared_work 调度程序看起来特别有前途，因为它可以让我为每个数据处理源启动一个数据处理任务，然后让它们根据需要在几个线程中相互分配。

然而，这让我想到了问题的根源:看起来每个进程只能有一个 shared_work “池”。如果我想在 4 个线程之间共享处理数据中的一组 12 个纤程，同时另一组 12 个纤程正在将处理后的数据写入另一个 4 个线程共享的文件，我该怎么办。

类似于:

#include<string>
#include<iostream>
#include<vector>
#include<mutex>
#include<thread>
#include<random>
#include<map>
#include<sstream>
#include<boost/bind.hpp>
#include<boost/fiber/all.hpp>

typedef boost::fibers::fiber FiberType;
typedef std::unique_lock<boost::fibers::mutex> LockType;


static const int fiberIterationCount = 5000;
static const int fiberCount          = 12;
static const int threadCount         = 4;
static const int distLowerLimit      = 50;
static const int distUpperLimit      = 500;

static boost::fibers::mutex firstMutex{};
static boost::fibers::mutex secondMutex{};
static boost::fibers::condition_variable firstCondition{};
static boost::fibers::condition_variable secondCondition{};
static boost::fibers::barrier synchronize{2*threadCount};
static int typeOneFibersFinished{0};
static int typeTwoFibersFinished{0};

static std::mt19937 typeOneGenerators[fiberCount];
static std::mt19937 typeTwoGenerators[fiberCount];

static std::mutex typeMapMutex;//lock for writing unnecessary for reads
static std::map<std::thread::id, std::string> threadTypeMap;


//simple function to give a heavy cpu load of variable duration
unsigned long long findPrimeNumber(int n)
{
    int count=0;
    unsigned long long a = 2;
    while(count<n)
    {
        bool isPrime = true;
        for(unsigned long long b = 2; (b * b) <= a; ++b)
        {
            if((a % b) == 0)
            {
                isPrime = false;
                break;
            }
        }
        if(isPrime)
        {
            count++;
        }
        ++a;
    }
    return (a - 1);
}

void fiberTypeOne(int fiberNumber)
{
    std::cout<<"Starting Type One Fiber #"<<fiberNumber;
    std::uniform_int_distribution<int> dist(distLowerLimit, distUpperLimit);
    for(int i=0; i<fiberIterationCount; ++i)
    {
        //generate a randomish load on this fiber so that it does not take a regular time slice
        int tempPrime = dist(typeOneGenerators[fiberNumber]);
        unsigned long long temp = findPrimeNumber(tempPrime);
        std::cout << "T1 fiber #"<<fiberNumber<<" running on "<<threadTypeMap[std::this_thread::get_id()]
                  <<"\n    Generated: "<<tempPrime<<", "<<temp;
        boost::this_fiber::yield();
    }

    {
        LockType lock(firstMutex);
        ++typeOneFibersFinished;
    }
    firstCondition.notify_all();
}

void threadTypeOne(int threadNumber)
{
    //make a shared work scheduler that associates its fibers with "fiber pool 0"
    boost::fibers::use_scheduling_algorithm< multi_pool_scheduler<0> >();
    std::cout<<"Starting Type One Thread #"<<threadNumber<<" With Thread ID: "<<std::this_thread::get_id();

    {
        std::unique_lock<std::mutex> lock{typeMapMutex};
        std::ostringstream gen;
        gen<<"Thread Type 1 - Number: "<<threadNumber<<" with id: "<<std::this_thread::get_id();
        threadTypeMap[std::this_thread::get_id()] = gen.str();
    }
    if(threadNumber == 0)
    { //if we are thread zero, create the fibers then join them to take ourselves off the "fiber list"
        std::cout<<"Spawning Type One Fibers";
        for(int fiberNumber=0; fiberNumber<fiberCount; ++fiberNumber)
        {//create the fibers and instantly detach them
            FiberType(boost::bind(&fiberTypeOne, fiberNumber)).detach();
        }
    }
    synchronize.wait();
    std::cout<<"T1 Thread preparing to wait";
    //now let the fibers do their thing
    LockType lock(firstMutex);
    firstCondition.wait(lock, [](){return (typeOneFibersFinished == fiberCount);});
}

void fiberTypeTwo(int fiberNumber)
{
    std::cout<<"Starting Type Two Fiber #"<<fiberNumber;
    std::uniform_int_distribution<int> dist(distLowerLimit, distUpperLimit);
    for(int i=0; i<fiberIterationCount; ++i)
    {
        //generate a randomish load on this fiber so that it does not take a regular time slice
        int tempPrime = dist(typeTwoGenerators[fiberNumber]);
        unsigned long long temp = findPrimeNumber(tempPrime);
        std::cout << "T2 fiber #"<<fiberNumber<<" running on "<<threadTypeMap[std::this_thread::get_id()]
                  <<"\n    Generated: "<<tempPrime<<", "<<temp;
        boost::this_fiber::yield();
    }

    {
        LockType lock(secondMutex);
        ++typeTwoFibersFinished;
    }
    secondCondition.notify_all();
}

void threadTypeTwo(int threadNumber)
{
    //make a shared work scheduler that associates its fibers with "fiber pool 1"
    boost::fibers::use_scheduling_algorithm< multi_pool_scheduler<1> >();
    std::cout<<"Starting Type Two Thread #"<<threadNumber<<" With Thread ID: "<<std::this_thread::get_id();
    {
        std::unique_lock<std::mutex> lock{typeMapMutex};
        std::ostringstream gen;
        gen<<"Thread Type 2 - Number: "<<threadNumber<<" with id: "<<std::this_thread::get_id();
        threadTypeMap[std::this_thread::get_id()] = gen.str();
    }
    if(threadNumber == 0)
    { //if we are thread zero, create the fibers then join them to take ourselves off the "fiber list"
        std::cout<<"Spawning Type Two Fibers";
        for(int fiberNumber=0; fiberNumber<fiberCount; ++fiberNumber)
        {//create the fibers and instantly detach them
            FiberType(boost::bind(&fiberTypeTwo, fiberNumber)).detach();
        }
    }
    synchronize.wait();
    std::cout<<"T2 Thread preparing to wait";
    //now let the fibers do their thing
    LockType lock(secondMutex);
    secondCondition.wait(lock, [](){return (typeTwoFibersFinished == fiberCount);});
}

int main(int argc, char* argv[])
{
    std::cout<<"Initializing Random Number Generators";
    for(unsigned i=0; i<fiberCount; ++i)
    {
        typeOneGenerators->seed(i*500U - 1U);
        typeTwoGenerators->seed(i*1500U - 1U);
    }

    std::cout<<"Commencing Main Thread Startup Startup";
    std::vector<std::thread> typeOneThreads;
    std::vector<std::thread> typeTwoThreads;
    for(int i=0; i<threadCount; ++i)
    {
        typeOneThreads.emplace_back(std::thread(boost::bind(&threadTypeOne, i)));
        typeTwoThreads.emplace_back(std::thread(boost::bind(&threadTypeTwo, i)));
    }
    //now let the threads do their thing and wait for them to finish with join
    for(unsigned i=0; i<threadCount; ++i)
    {
        typeOneThreads[i].join();
    }
    for(unsigned i=0; i<threadCount; ++i)
    {
        typeTwoThreads[i].join();
    }
    std::cout<<"Shutting Down";
    return 0;
}

如果不编写您自己的光纤调度器，这是否可行？如果是，怎么办？

最佳答案

我确定我确实需要编写自己的调度程序。但是，实际工作量很小。 boost::fibers::shared_work调度程序使用单个静态队列管理线程之间共享的纤程列表，由静态互斥体保护。还有另一个队列管理每个线程的主纤程(因为每个线程都有自己的调度程序)，但它是类实例的本地队列，而不是像静态成员那样在类的所有实例之间共享。

然后，为了防止静态队列和锁在不同的线程集之间共享，解决方案是在类前面放置一个几乎无用的模板参数。然后每个线程将不同的参数传递给这个模板。以这种方式，由于您为模板的每个特化获得不同的对象，因此您为每个具有不同池编号的实例化获得不同的静态变量集。

下面是我对这个解决方案的实现，(主要是 boost::fiber::shared_work 的拷贝，其中包含一些更明确命名的变量和类型，并添加了模板参数)。

#include <condition_variable>
#include <chrono>
#include <deque>
#include <mutex>
#include <boost/config.hpp>
#include <boost/fiber/algo/algorithm.hpp>
#include <boost/fiber/context.hpp>
#include <boost/fiber/detail/config.hpp>
#include <boost/fiber/scheduler.hpp>
#include <boost/assert.hpp>
#include "boost/fiber/type.hpp"

#ifdef BOOST_HAS_ABI_HEADERS
#  include BOOST_ABI_PREFIX
#endif

#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable:4251)
#endif

/*!
* @class SharedWorkPool
* @brief A scheduler for boost::fibers that operates in a manner similar to the
* shared work scheduler, except that it takes a template parameter determining
* which pool to draw fibers from. In this fashion, one group of threads can share
* a pool of fibers among themselves while another group of threads can work with
* a completely separate pool
* @tparam PoolNumber The index of the pool number for this thread
*/
template <int PoolNumber>
class SharedWorkPool : public boost::fibers::algo::algorithm
{
    typedef std::deque<boost::fibers::context * >      ReadyQueueType;
    typedef boost::fibers::scheduler::ready_queue_type LocalQueueType;
    typedef std::unique_lock<std::mutex>               LockType;

public:
    SharedWorkPool() = default;
    ~SharedWorkPool() override {}

    SharedWorkPool( bool suspend) : suspendable{suspend}{}

    SharedWorkPool( SharedWorkPool const&) = delete;
    SharedWorkPool( SharedWorkPool &&) = delete;

    SharedWorkPool& operator=(const SharedWorkPool&) = delete;
    SharedWorkPool& operator=(SharedWorkPool&&) = delete;

    void awakened(boost::fibers::context* ctx) noexcept override;

    boost::fibers::context* pick_next() noexcept override;

    bool has_ready_fibers() const noexcept override
    {
        LockType lock{readyQueueMutex};
        return ((!readyQueue.empty()) || (!localQueue.empty()));
    }

    void suspend_until(const std::chrono::steady_clock::time_point& timePoint) noexcept override;

    void notify() noexcept override;

private:
    static ReadyQueueType readyQueue;
    static std::mutex     readyQueueMutex;

    LocalQueueType          localQueue{};
    std::mutex              instanceMutex{};
    std::condition_variable suspendCondition{};
    bool                    waitNotifyFlag{false};
    bool                    suspendable{false};

};

template <int PoolNumber>
void SharedWorkPool<PoolNumber>::awakened(boost::fibers::context* ctx) noexcept
{
    if(ctx->is_context(boost::fibers::type::pinned_context))
    { // we have been passed the thread's main fiber, never put those in the shared queue
        localQueue.push_back(*ctx);
    }
    else
    {//worker fiber, enqueue on shared queue
        ctx->detach();
        LockType lock{readyQueueMutex};
        readyQueue.push_back(ctx);
    }
}


template <int PoolNumber>
boost::fibers::context* SharedWorkPool<PoolNumber>::pick_next() noexcept
{
    boost::fibers::context * ctx = nullptr;
    LockType lock{readyQueueMutex};
    if(!readyQueue.empty())
    { //pop an item from the ready queue
        ctx = readyQueue.front();
        readyQueue.pop_front();
        lock.unlock();
        BOOST_ASSERT( ctx != nullptr);
        boost::fibers::context::active()->attach( ctx); //attach context to current scheduler via the active fiber of this thread
    }
    else
    {
        lock.unlock();
        if(!localQueue.empty())
        { //nothing in the ready queue, return main or dispatcher fiber
            ctx = & localQueue.front();
            localQueue.pop_front();
        }
    }
    return ctx;
}

template <int PoolNumber>
void SharedWorkPool<PoolNumber>::suspend_until(const std::chrono::steady_clock::time_point& timePoint) noexcept
{
    if(suspendable)
    {
        if (std::chrono::steady_clock::time_point::max() == timePoint)
        {
            LockType lock{instanceMutex};
            suspendCondition.wait(lock, [this](){return waitNotifyFlag;});
            waitNotifyFlag = false;
        }
        else
        {
            LockType lock{instanceMutex};
            suspendCondition.wait_until(lock, timePoint, [this](){return waitNotifyFlag;});
            waitNotifyFlag = false;
        }
    }
}

template <int PoolNumber>
void SharedWorkPool<PoolNumber>::notify() noexcept
{
    if(suspendable)
    {
        LockType lock{instanceMutex};
        waitNotifyFlag = true;
        lock.unlock();
        suspendCondition.notify_all();
    }
}

template <int PoolNumber>
std::deque<boost::fibers::context*> SharedWorkPool<PoolNumber>::readyQueue{};

template <int PoolNumber>
std::mutex SharedWorkPool<PoolNumber>::readyQueueMutex{};

请注意，如果您尝试在不同编译单元的声明中使用相同的池编号，我不完全确定会发生什么。但是，在正常情况下，即你只写了 boost::fibers::use_scheduling_algorithm< Threads::Fibers::SharedWorkPool<WorkPoolNumber> >();每个 WorkPoolNumber 在一个位置，它完美地工作。分配给一组给定线程的纤程始终在同一组线程中运行，绝不会由不同的线程组运行。

关于c++ - 使用 Boost::Fiber 的多个共享工作池，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/51051481/

文章推荐： Java Eclipse SVN - 提交

文章推荐： java - 如何使动态加载的插件能够感知 Web 应用程序

文章推荐： c++ - Directx11 加载纹理

azure - 为什么我可以连接到 Synapse 无服务器 SQL 池，但无法连接到同一 Synapse 工作区中的专用 SQL 池
最近，我们将专用 SQL 池部署到生产中的 Synapse 工作区。在开发中，我们可以访问无服务器 SQL 池和专用 SQL 池。但是，在生产中，我们可以访问无服务器 SQL 池，但无法访问专用 SQ
WCF 客户端连接缓存/池
假设您从一个项目公开 WCF 服务，并使用“添加服务引用”(在本例中为 Framework 3.5 WPF 应用程序)在另一个项目中使用它。当您重新实例化 ClientBase 派生代理时，Clie
Python 池生成池
我有一个函数，它使用 multiprocessing.Pool 并行处理一个数据集中的所有数据。 from multiprocessing import Pool ... def func():
带有工作进程的 python 池
我正在尝试使用进程对象在 python 中使用工作池。每个 worker (一个进程)进行一些初始化(花费大量时间)，传递一系列作业(理想情况下使用 map())，并返回一些东西。除此之外，不需要任何
ZFS 列表与 ZFS 池
我是软件工程师，最近我构建了我的 Linux 机器，想探索更多系统管理员类型的任务。我已经探索并阅读了很多关于 ZFS 的内容，但我越来越困惑，因为每篇文章对它的描述都不一样。 Everything
zfs - 如何在池繁忙时销毁 ZFS 池？
我有 zfs 池: $ sudo zpool status lxd pool: lxd state: ONLINE scan: none requested config: NAME
multithreading - 斯卡拉的 Actor 池
我有一个基于 Actor 的项目，对于其中的一部分，我必须使用一些接收消息的 Actor ，然后一个 Actor 分别分配给每个请求，每个 Actor 负责执行其消息请求，所以我需要类似线程的东西我的
solaris - 以可写方式打开 ZFS 池
我已经使用 QEMU 模拟器成功地将 FreeBSD 安装到原始图像文件中。我已经使用 ZFS 文件系统 (ZFS POOL) 格式化了图像文件。使用下面的命令我已经成功地挂载了准备好由 zpool
python - 不断运行的 worker 池
我正在使用 multiprocessor.Pool并行处理一些文件。该代码等待接收文件，然后使用 Pool.apply_async 将该文件发送给工作人员。，然后处理文件。这段代码应该一直在运行，
scala - 具有关闭连接的 BoneCp 池
我正在使用带有光滑的 Bonecp 数据源。并发现池包含关闭的连接所以我总是遇到这个异常 java.sql.SQLException: Connection is closed! at com
ruby-on-rails - 如何在Sidekiq中切换Redis主机/池？
我有apartment gem的 Multi-Tenancy Rails应用程序，我可以使用apartment-sidekiq在每个工作程序中成功切换数据库租户。但是，sidekiq worker 正
ZFS 文件系统与 ZFS 池
ZFS 池可能由数据集(文件系统、快照等)或卷组成。 ZFS 卷就像 block 设备，但我不明白池和文件系统之间的区别。当我通过 zpool create pool1 sda sdb sdc 创建
docker - 以编程方式创建 Airflow 池
我在 docker 容器上运行了 airflow。我正在使用 airflow 2.0.2 版。我知道我实际上可以通过 UI 创建池。但我正在寻找一种通过 pools.json 文件在 docker
java - 连接未返回 tomcat 池
我在tomcat中有一个jdbc池，用于建立数据库连接。我在使用后没有显式关闭连接对象。我的“maxActive”参数设置为100。应用程序运行了一段时间，但随后失败进行数据库查询。它会等待无限时间来
java - PostgreSQL JDBC 池
阅读 PostgreSQL 文档 here我读了以下内容: As well, connections requested for users other than the default config
docker - 以编程方式创建 Airflow 池
我在 docker 容器上运行了 airflow。我正在使用 airflow 2.0.2 版。我知道我实际上可以通过 UI 创建池。但我正在寻找一种通过 pools.json 文件在 docker
java - 如何保持固定大小的 ListenableFuture 池？
我正在读取一个大的 URL 文件并向服务发出请求。该请求由返回 ListenableFuture 的客户端执行。现在我想保留一个 ListenableFuture 池，例如最多同时执行 N 个 Fut
python - 池、队列、悬挂
我想使用队列来保存结果，因为我希望消费者(串行而不是并行)在工作人员产生结果时处理工作人员的结果。现在，我想知道为什么以下程序挂起。 import multiprocessing as mp imp
javascript - JQuery Ajax 池
我正在开发一个单页应用程序，目前正在构建一个 JQuery、ajax 函数，以便我的所有调用都能通过。对于一个典型的页面，我可能有 3 个 ajax 调用。我的想法是，如果用户互联网出去将这些 aj
java - 用于不同类型子对象的 Libgdx 池
我有一个单位类及其一些子类(弓箭手、剑客等)。我怎样才能创建一个回收所有单元类型子类的池？最佳答案这是不可能的，因为池只能包含一种特定类型的对象。否则你可能会遇到这样的情况: Pool unitP

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - 使用 Boost::Fiber 的多个共享工作池