c++ - 如果我的 CPU 负载另有建议，我应该启动多个线程吗？-6ren

c++ - 如果我的 CPU 负载另有建议，我应该启动多个线程吗？

转载作者：搜寻专家更新时间：2023-10-31 01:37:23

我正在用 C++ 编写一个程序，该程序根据引用注释计算 NGS 读取比对。基本上，该程序将注释和对齐文件都读入内存，遍历注释，二进制搜索对齐文件中的可能位置，找到该位置后线性搜索围绕该可能位置的帧。

通常我想让这个框架稍微大一点(10000 次对齐)，所以我想到了将框架拆分并将它的一部分放入单独的线程中。

一切都编译并运行，但我的多线程看起来不像预期的那样工作，因为我的 comp 使用一个核心来完成这项工作。有没有人能帮我弄清楚我在哪里实现了线程错误。

https://sourceforge.net/projects/fast-count/?source=directory

#include <iostream>
#include <cstdlib>
#include <vector>
#include <string>
#include <thread>
#include <sstream>
#include <fstream>
#include <math.h> 
#include "api/BamReader.h"

using namespace std;
using namespace BamTools;

int hit_count = 0;

struct bam_headers{

    string chr;
    int start;

};

struct thread_data{

   int thread_id;
   int total_thread;
   int start_gtf;
   int stop_gtf;

};

struct gtf_headers{

    string chr;
    string source;
    string feature;
    string score;
    string strand;
    string frame;
    string annotation;
    int start;
    int end;

};

void process(int* start_holder, int size, int gtf_start, int gtf_stop){

    //threaded counter process

    for (int t = 0; t < size; t++){
        if((start_holder[t] >= gtf_start) && (start_holder[t] <= gtf_stop)){
            hit_count++;
        }
    }

}

vector <string> find_index(vector <vector <bam_headers> > bams){

    //define vector for bam_index to chromosome

    vector <string> compute_holder;
    for (int bam_idx = 0; bam_idx < bams.size();bam_idx++){
        compute_holder.push_back(bams[bam_idx][0].chr);
    }
    return compute_holder;

}

vector <gtf_headers> load_gtf(char* filename){

    //define matrix to memory holding gtf annotations by assoc. header

    vector<gtf_headers> push_matrix;
    gtf_headers holder;
    ifstream gtf_file(filename);
    string line;

    cout << "Loading GTF to memory" << "\n";
    if (gtf_file.is_open()){
        int sub_count = 0;
        string transfer_hold[8];
        while(getline(gtf_file,line)){
            //iterate through file
            istringstream iss(line);
            string token;
            //iterate through line, and tokenize by tab delimitor
            while(getline(iss,token,'\t')){
                if (sub_count == 8){
                    //assign to hold struct, and push to vector
                    holder.chr = transfer_hold[0];
                    holder.source = transfer_hold[1];
                    holder.feature = transfer_hold[2];
                    holder.start = atoi(transfer_hold[3].c_str());
                    holder.end = atoi(transfer_hold[4].c_str());
                    holder.score = transfer_hold[5];
                    holder.strand = transfer_hold[6];
                    holder.frame = transfer_hold[7];
                    holder.annotation = token;
                    push_matrix.push_back(holder);
                    sub_count = 0;
                } else {
                    //temporarily hold tokens
                    transfer_hold[sub_count] = token;
                    ++sub_count;
                }
            }
        }
        cout << "GTF successfully loaded to memory" << "\n";
        gtf_file.close();
        return(push_matrix);
    }else{
        cout << "GTF unsuccessfully loaded to memory. Check path to file, and annotation format. Exiting" << "\n";
        exit(-1);
    }
}

vector <vector <bam_headers>> load_bam(char* filename){

    //parse individual bam file to chromosome bins

    vector <vector <bam_headers> > push_matrix;
    vector <bam_headers> iter_chr;
    int iter_refid = -1;
    bam_headers bam_holder;
    BamReader reader;
    BamAlignment al;
    const vector<RefData>& references = reader.GetReferenceData();

    cout << "Loading " << filename << " to memory" << "\n";
    if (reader.Open(filename)) {    
        while (reader.GetNextAlignmentCore(al)) {
            if (al.IsMapped()){
                //bam file must be sorted by chr. otherwise the lookup will segfault
                if(al.RefID != iter_refid){
                    //check if chr. position has advanced in the bam file, if true, push empty vector
                    iter_refid++;
                    push_matrix.push_back(iter_chr);
                }else{
                    //if chr. position hasn't advanced push to current index in 2d vector
                    bam_holder.chr = references[al.RefID].RefName;
                    bam_holder.start = al.Position;
                    push_matrix.at(iter_refid).push_back(bam_holder);
                }
            }
        }
        reader.Close();
        cout << "Successfully loaded " << filename << " to memory" << "\n";
        return(push_matrix);
    }else{
        cout << "Could not open input BAM file. Exiting." << endl;
        exit(-1);
    }

}

short int find_bin(const string & gtf_chr, const vector <string> mapping){

    //determines which chr. bin the gtf line is associated with 

    int bin_compare = -1;
    for (int i = 0; i < mapping.size(); i++){
        if(gtf_chr == mapping[i]){ 
            bin_compare = i;
        }
    }
    return(bin_compare);

}

int find_frame(gtf_headers gtf_matrix, vector <bam_headers> bam_file_bin){

    //binary search to find alignment index with greater and less than gtf position

    int bin_size = bam_file_bin.size();
    int high_end = bin_size;
    int low_end = 0;
    int binary_i = bin_size / 2;
    int repeat = 0;
    int frame_start;
    bool found = false;

    while (found != true){
        if ((bam_file_bin[binary_i].start >= gtf_matrix.start) && (bam_file_bin[binary_i].start <= gtf_matrix.end)){
            frame_start = binary_i;
            found = true;
        }else{
            if(repeat != binary_i){
                if(bam_file_bin[binary_i].start > gtf_matrix.end){
                    if(repeat != binary_i){
                        repeat = binary_i;
                        high_end = binary_i;
                        binary_i = ((high_end - low_end) / 2) + low_end;
                    }
                }else{
                    if(repeat != binary_i){
                        repeat = binary_i;
                        low_end = binary_i;
                        binary_i = ((high_end - low_end) / 2) + low_end;
                    }
                }
            }else{
                frame_start = low_end; 
                found = true;
            }
        }   
    }
    return(frame_start);
}

vector <int > define_frame(int frame_size, int frame_start, int bam_matrix){

    //define the frame for the search
    vector <int> push_ints;
    push_ints.push_back(frame_start - (frame_size / 2)); 
    push_ints.push_back(frame_start + (frame_size / 2)); 
    if(push_ints[0] < 0){
        push_ints[0] = 0;
        push_ints[1] = frame_size;
        if(push_ints[1] > bam_matrix){
            push_ints[1] = frame_size;
        }
    } 
    if(push_ints[1] > bam_matrix){
        push_ints[1] = bam_matrix;
        push_ints[0] = bam_matrix - (frame_size / 2);
        if(push_ints[0] < 0){
            push_ints[0] = 0;
        }
    }
    return(push_ints);

}

void thread_handler(int nthread, vector <int> frame, vector <bam_headers> bam_matrix, gtf_headers gtf_record){

    int thread_divide = frame[1]-frame[0];//frame_size / nthread;
    int thread_remain = (frame[1]-frame[0]) % nthread;
    int* start_holder = new int[thread_divide];

    for(int i = 0; i < nthread; i++){
        if (i < nthread - 1){
            for (int frame_index = 0; frame_index < thread_divide; frame_index++){
                 start_holder[frame_index] = bam_matrix[frame[0]+frame_index].start;         
            } 
            frame[0] = frame[0] + thread_divide;
            thread first(process, start_holder,thread_divide,gtf_record.start,gtf_record.end);
            first.join();
        }else{
            for (int frame_index = 0; frame_index < thread_divide + thread_remain; frame_index++){
                 start_holder[frame_index] = bam_matrix[frame[0]+frame_index].start;    
            } 
            thread last(process, start_holder,thread_divide + thread_remain,gtf_record.start,gtf_record.end);
            last.join();
        }
    }

}



int main (int argc, char *argv[])
{

    // usage
    // ./count threads frame_size gtf_file files

    //define matrix to memory holding gtf annotations by assoc. header
    vector <gtf_headers> gtf_matrix = load_gtf(argv[3]);

    //load bam, perform counts
    for(int i = 4;i < argc;i++){

        //iterate through filenames in argv, define matrix to memory holding bam alignments chr and bp position
        vector <vector <bam_headers> > bam_matrix = load_bam(argv[i]);

        //map chromosome to bam matrix index
        vector <string> index_mapping = find_index(bam_matrix);

        //iterate through gtf matrix, find corresponding bins for chr, set search frames, and count
        for(int gtf_i = 0; gtf_i < gtf_i < gtf_matrix.size();gtf_i++){ //gtf_i < gtf_matrix.size()

            hit_count = 0;
            //find corresponding bins for gtf chr
            short int bin_compare = find_bin(gtf_matrix[gtf_i].chr,index_mapping);

            if(bin_compare != -1){

                //find start of search frame
                int frame_start = find_frame(gtf_matrix[gtf_i], bam_matrix[bin_compare]);

                //get up lower bounds of search frame;
                vector <int> full_frame = define_frame(atoi(argv[2]),frame_start,bam_matrix[bin_compare].size());

                //create c array of bam positional data for the frame, and post to thread process
                thread_handler(atoi(argv[1]),full_frame,bam_matrix[bin_compare],gtf_matrix[gtf_i]);

            }

            //counts displayed in STOUT
            cout << gtf_matrix[gtf_i].chr << "\t" << gtf_matrix[gtf_i].source << "\t" << gtf_matrix[gtf_i].feature << "\t" << gtf_matrix[gtf_i].start << "\t" << gtf_matrix[gtf_i].end << "\t" << gtf_matrix[gtf_i].score << "\t" << gtf_matrix[gtf_i].strand << "\t" << gtf_matrix[gtf_i].frame << "\t" << gtf_matrix[gtf_i].annotation << "\t" << hit_count << "\n";

        }
    }
}

最佳答案

你的问题的答案很简单:

thread last(process, start_holder,thread_divide + thread_remain,gtf_record.start,gtf_record.end);
last.join();

在这里，父任务创建了一个新线程，并且...立即等待线程完成。这就是 join() 所做的，它等待线程终止。

因此，您的代码启动一个新线程，并立即等待它完成，然后再做任何其他事情，例如启动下一个线程。

你需要重写 thread_handler() 来实例化所有的 std::thread 实例，然后在实例化所有实例之后，在每个实例上调用 join()，以等待他们全部完成。

典型的方法是使用 std::thread 的默认构造函数预先创建一个所有线程实例的 std::vector，然后遍历它们以初始化每个一个，然后再次遍历它们，对每个调用 join()。

关于c++ - 如果我的 CPU 负载另有建议，我应该启动多个线程吗？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/34192171/

文章推荐： c++ - WTL CListViewCtrl getSelectedItem 导致我断言失败

文章推荐： c++ - 功能原型(prototype)制作，程序暂停

文章推荐： c++ - C++ 运算符重载失败

文章推荐： c++ - 将 std::function 作为参数传递给 for_each

.NET 开发人员开始社交网站，建议？
我是一个相对较新的程序员； CS 学士学位，大学毕业大约 2 年，主要使用 C# 中的 .NET。我对 SQL 交互/脚本编写相当流利，并且对 ASP.NET 做了一些工作(主要是维护现有站点)。我
opencv - 动态视频流分析 - 建议？
我计划开发一个简单的解决方案，使我能够即时执行非常基本的视频流分析。我以前从未做过类似的事情，因此这是一个非常笼统和开放的问题。主要重点是检查流是否正常运行，例如 - 卡住帧、黑屏以及音频是否存在。同
关于大型项目的版本控制和避免包含表达式的版本的 Maven 建议
我正在考虑重组一个大型 Maven 项目...... 我们当前结构的基本概述: build [MVN plugins, third party dependency management]:5.1
sql - 查询调优 - 建议
我需要有关附加查询的建议。该查询执行了一个多小时，并根据解释计划进行了全表扫描。我对查询调优还很陌生，希望得到一些建议。首先，为什么我要进行全表扫描，即使我使用的所有列都在其上创建了索引。其次，有
mysql - 一个疯狂的数据库结构 - 建议
我正在做一个项目，我需要在 4 个模型之间创建三个多对多关系。这是它的过程: 常见问题类别可以有许多常见问题子类别，反之亦然。常见问题组可以有许多常见问题的子类别，反之亦然。常见问题可以有许多常见
embedded - 小型嵌入式合成语音库/建议
对于代码大小比语音质量更重要的 PIC 和/或 ARM 嵌入式系统，是否有任何易于使用的免费或廉价的语音合成库？现在似乎 1 meg 的封装被认为是“紧凑的”，但很多微 Controller 都比它小
具有多个有效负载的 Solr 建议
我们正在使用 Solr 建议器功能进行 businessName 查找。当用户输入查询以及匹配的名称时，我们希望 solr 发送来自个人资料的其他属性，如 id、地址、城市、州、国家等字段。我尝试使
Delphi:建议，构建用户界面的想法
我正在构建一个用户界面。我的计划将包括 4 个主要部分: 1) 顶部菜单 - TMainMenu。一个窗口的顶部 2) 主菜单 - TTreeView。一个窗口的左边。 TreeView的每一项=对应
sharepoint - 需要技术推荐/建议
我的公司需要一个任务管理系统来处理从“为X购买一台计算机”到“将一个人转移到另一个国家”这样简单的场景。简单的场景是由一个人处理的单个任务，而更大的任务可以分解为在工作流程中委派给多个人的多个子任务。
marklogic - 内存使用规划 - 建议？
MarkLogic 服务器的林大小与实际内存的建议比率是多少？例如，我目前有一个 190GB 的数据库，并且该数据库随着时间的推移而不断增长。由于数据库会不断增长，我最终需要对该数据库进行集群。因此，
audio - 关于如何解码数据包的线索，建议
去年我收到了一个礼物，它是一个索尼 CMT700Ni 音频站，支持 wifi。它还具有类似于广播的功能，称为“PartyStreaming”。我目前正在挖掘内部，探索它，所以也许我可以结束拥有自己的“
nlp - 如何选择特征选择算法？ - 建议
有没有我可以阅读的研究论文/书籍可以告诉我针对手头的问题哪种特征选择算法最有效。我试图简单地将 Twitter 消息识别为 pos/neg(首先)。我从基于频率的特征选择开始(从 NLTK 书开始)
.net - 需要技术推荐/建议
关闭。这个问题不符合Stack Overflow guidelines .它目前不接受答案。要求我们推荐或查找工具、库或最喜欢的场外资源的问题对于 Stack Overflow 来说是偏离主题的，
java - jUnit - 建议
我正在浏览 stackoverflow 以查找有关使用 jUnit 进行测试的常见建议，但仍然有几个问题。我知道，如果要测试的方法很复杂，最好的方法是将其分成小的单独部分并测试每个部分。但问题是 -
Java Collection 建议
我有一个方法如下 public List> categorize(List customClass){ List> returnValue = new ArrayList<>();
svn - 需要关于使用分支和合并回主干的帮助/建议
我的问题是，当按照下面的程序合并时，在最佳实践场景中，“将分支折叠回主干”程序的最后一步是正确的方法吗？我已经使用 svn 很多年了。在我的个人项目中，我总是毫不犹豫地在主干上愉快地进行修改，并且在
iphone - UINavigationController 建议
我读过 UINavigationController当您想从 n 个屏幕跳转到第一个屏幕时，这是最佳选择。这样做需要以下代码: NSMutableArray *array=[[NSMutableArr
java - 文件输入帮助/建议
我有一个文件输入类。它在构造函数中有一个字符串参数来加载提供的文件名。但是，如果文件不存在，它就会退出。如果文件不存在，我希望它输出一条消息 - 但不确定如何...... 这是类(class): pu
flash - 交互式世界地图 - 建议？
我希望创建一个“您访问过的国家/地区” map - 就像您可能在 Facebook、TravelAdvisor 和诸如此类的网站上看到的那样。我尝试过不同的闪光灯套件，但它们并不像我希望的那样先进。
Perl 建议 - 接收文件并更改内容
我需要一些关于如何处理我想用 Perl 编写的脚本的建议。基本上我有一个看起来像这样的文件: id: 1 Relationship: "" name: shelby pet: 1

搜寻专家

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - 如果我的 CPU 负载另有建议，我应该启动多个线程吗？