gpt4 book ai didi

c++ - 如何在 ssd-caffe 中加载图像而不是 LMDB

转载 作者:行者123 更新时间:2023-12-02 10:30:55 25 4
gpt4 key购买 nike

我在阅读 ssd-caffe 代码时有一些问题,我真的需要你的帮助。

  • 原生caffe只支持分类,数据读取层常用来读取LMDB数据库和读取图像进行训练
  • 为了支持输入多个标签和输入注释框,我决定使用ssd-caffe,它在原生caffe中增加了一个AnnotatedDataLayer层。这个新添加的图层可以支持多个标签和注释框,但它有局限性。原因是它读取的数据类型还是lmdb;
  • 我们现在需要随机读取数据集的数据,但是根据查询结果,lmdb是B+树结构,只能通过迭代器顺序读取,所以我们想把lmdb改成直接读取图片。但是原生caffe的直读图片不支持多标签和标注框。怎么修改caffe的image_data_layers来支持注解框的输入(可以按照AnnotatedDataLayer的方法解决问题吗)?

  • 笔记:
  • 修改ssd-caffe源码:https://github.com/eric612/MobileNet-YOLO
  • 新增标注框的文件路径:/MobileNet-YOLO/src/caffe/layers/annotated_data_layer.cpp
  • 直接读取图片的原生caffe文件路径:/MobileNet-YOLO/src/caffe/layers/image_data_layer.cpp
  • 最佳答案

    数据层提供从硬盘异步读取随机数据的可能性(它使用 2 个线程:一个线程读取,另一个将数据传递到神经网络)。您的顶级 blob 由数据和标签组成。不幸的是,标签是一维的。为了解决这个问题,可以按特殊顺序组织我们的 lmdb 数据库。然后,当我们读取数据时,在将其传递给神经网络之前,我们对其进行转换以使其适应我们的问题。下面我通过一个例子来展示:首先我将编写一个 LMDB 数据库,其中包含 10 个不同的图像(它是相同的图像,但我们假设它们是不同的)、10 个随机边界框和 10 个维度为 3 的随机标签。

    注意 : 要重现以下代码,您必须安装 caffe。如果您只编译了 caffe 文件夹,则在 中创建文件夹root_caffe/examples/new_folder , 把代码放在那里然后编译制作 .

    #include <caffe/caffe.hpp>
    #include "caffe/proto/caffe.pb.h"
    #include "caffe/util/db.hpp"
    #include "boost/scoped_ptr.hpp"
    #include <opencv2/imgcodecs.hpp>
    #include <iostream>
    #include <stdlib.h>


    using namespace caffe;
    using boost::scoped_ptr;


    std::vector<float> generate_random_boxes(const int max_num_bbx){

    std::vector<float> bbx(4*max_num_bbx);

    for(int i = 0; i < max_num_bbx; i++){

    float scale = 500*static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
    float x1 = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
    float y1 = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
    float x2 = x1 + static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
    float y2 = x1 + static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
    bbx[i*4] = scale*x1;
    bbx[i*4 + 1] = scale*y1;
    bbx[i*4 + 2] = scale*x2;
    bbx[i*4 + 3] = scale*y2;

    }

    return bbx;
    }

    std::vector<float> generate_random_labels(const int dim_label, const int max_num_bbx){

    std::vector<float> labels(dim_label*max_num_bbx);

    for(int i = 0; i < max_num_bbx; i++){
    for(int j = 0; j < dim_label; j++){

    labels[dim_label*i + j] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);

    }
    }

    return labels;
    }


    int main(){

    const std::string root_path = "/path/for/test/";
    const std::string path_lmdb = root_path + "lmdb";
    std::string rm_lmdb = std::string("rm -rf ") + path_lmdb.c_str();
    system(rm_lmdb.c_str());
    scoped_ptr<db::DB> db(db::GetDB("lmdb"));
    db->Open(path_lmdb, db::NEW);
    scoped_ptr<db::Transaction> txn(db->NewTransaction());


    int n = 10;
    int max_num_bbx = 7;
    int dim_label = 3;
    cv::Mat aux_img = cv::imread(root_path + "image.jpg");
    int rows = aux_img.rows;
    int cols = aux_img.cols;

    std::vector<cv::Mat> vec_img(n);
    std::vector< std::vector<float> > vec_bbx(n);
    std::vector< std::vector<float> > vec_label(n);

    for(int i = 0; i < n; i++){

    vec_img[i] = aux_img.clone();
    vec_bbx[i] = generate_random_boxes(max_num_bbx);
    vec_label[i] = generate_random_labels(dim_label, max_num_bbx);

    }

    for(int i = 0; i< n; i++){

    int sz = 3*rows*cols + 4*max_num_bbx + dim_label*max_num_bbx;

    Datum datum;
    datum.set_label(0); //no used
    datum.set_channels(1);
    datum.set_height(1);
    datum.set_width(sz);

    google::protobuf::RepeatedField<float>* datumFloatData = datum.mutable_float_data();

    //store images
    cv::Mat img = vec_img[i];
    for(int d = 0; d < 3; d++){ //BGR
    for(int r = 0; r < rows; r++){
    for(int c = 0; c < cols; c++){

    cv::Vec3b pixel = img.at<cv::Vec3b>(r, c);
    datumFloatData->Add(float(pixel[d]));

    }
    }
    }


    //store bounding-boxes
    std::vector<float>& bbx = vec_bbx[i];
    for(int j = 0; j < 4*max_num_bbx; j++)
    datumFloatData->Add(bbx[j]);

    //store labels
    std::vector<float>& label = vec_label[i];
    for(int j = 0; j < dim_label*max_num_bbx; j++)
    datumFloatData->Add(label[j]);


    //store lmdb
    std::string key_str = caffe::format_int(i);
    std::string out;
    CHECK(datum.SerializeToString(&out));
    txn->Put(key_str, out);
    txn->Commit();
    txn.reset(db->NewTransaction());
    std::cout<<"save data: "<<i<<std::endl;


    }

    return 0;

    }

    然后在文件夹 "/path/for/test"我们将有一个名为 的文件夹lmdb 包含我们的数据库。现在我们必须读取数据并按所需的顺序组织它。为此,我将使用 切片层,它允许将输入的底部数据分成多个顶部。因此,由这批图像、边界框和标签组成的输入数据将分为 5 个顶部 blob:img_b、img_g、img_r、bbx、标签。
    #include <caffe/caffe.hpp>

    #include <opencv2/imgcodecs.hpp>
    #include <opencv2/core.hpp>
    #include <opencv2/imgcodecs.hpp>
    #include <opencv2/highgui.hpp>
    #include <opencv2/imgproc/imgproc.hpp>
    #include "boost/scoped_ptr.hpp"
    #include <iostream>
    #include <stdio.h>
    #include <stdlib.h>

    using namespace caffe;
    using boost::scoped_ptr;

    int main(){


    const std::string root_path = "/path/for/test/";
    const std::string path_lmdb = root_path + "lmdb";


    //parameters used to store lmdb data base
    int n = 10;
    int max_num_bbx = 7;
    int dim_label = 3;
    cv::Mat aux_img = cv::imread(root_path + "image.jpg");
    int rows = aux_img.rows;
    int cols = aux_img.cols;


    //here we build the network input

    NetParameter net_param;

    LayerParameter* db_layer_param = net_param.add_layer();
    db_layer_param->set_name("data");
    db_layer_param->set_type("Data");
    DataParameter* db_data_param = db_layer_param->mutable_data_param();

    db_data_param->set_batch_size(2);
    db_data_param->set_prefetch(3);


    db_data_param->set_source(path_lmdb);
    db_data_param->set_backend(DataParameter_DB_LMDB);


    db_layer_param->add_top("data");

    LayerParameter* slice_layer_param = net_param.add_layer();
    slice_layer_param->set_name("slice");
    slice_layer_param->set_type("Slice");
    slice_layer_param->mutable_slice_param()->set_axis(3);//starting B
    slice_layer_param->mutable_slice_param()->add_slice_point(rows*cols);//starting G
    slice_layer_param->mutable_slice_param()->add_slice_point(2*rows*cols);//starting R
    slice_layer_param->mutable_slice_param()->add_slice_point(3*rows*cols);//starting bbx
    slice_layer_param->mutable_slice_param()->add_slice_point(3*rows*cols + 4*max_num_bbx);//starting labels


    slice_layer_param->add_bottom("data");

    slice_layer_param->add_top("img_b");
    slice_layer_param->add_top("img_g");
    slice_layer_param->add_top("img_r");
    slice_layer_param->add_top("bbx");
    slice_layer_param->add_top("labels");


    //NOTE: you must add the additional layers of your model
    /*
    .
    .
    .
    .
    */



    //here we store and load the model
    //NOTE:In this example is not necessary to store the model in prototxt file
    const std::string net_file = root_path + "model.prototxt";
    Net<float> net(net_param);
    WriteProtoToTextFile(net_param,net_file);




    //here we make forward in order to read our data
    net.Forward();



    /*Note that in this example we read 2 images, but then we will only show the first*/

    //read first image
    boost::shared_ptr< Blob< float > > img_b = net.blob_by_name("img_b");
    boost::shared_ptr< Blob< float > > img_g = net.blob_by_name("img_g");
    boost::shared_ptr< Blob< float > > img_r = net.blob_by_name("img_r");

    cv::Mat img(rows,cols,CV_8UC3);

    for(int r = 0; r < rows; r++){
    for(int c = 0; c < cols; c++){

    img.at<cv::Vec3b>(r,c)[0] = (uchar) img_b->cpu_data()[r*cols + c];
    img.at<cv::Vec3b>(r,c)[1] = (uchar) img_g->cpu_data()[r*cols + c];
    img.at<cv::Vec3b>(r,c)[2] = (uchar) img_r->cpu_data()[r*cols + c];
    }
    }



    //read bounding boxes
    boost::shared_ptr< Blob< float > > bbx = net.blob_by_name("bbx");

    for(int i = 0; i < max_num_bbx; i++){

    float x1 = bbx->cpu_data()[4*i];
    float y1 = bbx->cpu_data()[4*i + 1];
    float x2 = bbx->cpu_data()[4*i + 2];
    float y2 = bbx->cpu_data()[4*i + 3];

    cv::Point pt1(y1, x1);
    cv::Point pt2(y2, x2);
    cv::rectangle(img, pt1, pt2, cv::Scalar(0, 255, 0));

    }


    //read labels
    boost::shared_ptr< Blob< float > > labels = net.blob_by_name("labels");

    std::cout<<"labels: "<<std::endl;
    for(int i = 0; i < max_num_bbx; i++){
    for(int j = 0; j < dim_label; j++){

    std::cout<<labels->cpu_data()[i*dim_label + j]<<" ";

    }
    std::cout<<std::endl;
    }


    cv::imshow("img", img);
    cv::waitKey(0);

    return 0;

    }

    生成的输出如下:

    enter image description here

    使用 WriteProtoToTextFile (net_param, net_file) 生成的 prototxt资料 切片层 如下:
    layer {
    name: "data"
    type: "Data"
    top: "data"
    data_param {
    source: "/path/for/test/lmdb"
    batch_size: 2
    backend: LMDB
    prefetch: 3
    }
    }
    layer {
    name: "slice"
    type: "Slice"
    bottom: "data"
    top: "img_b"
    top: "img_g"
    top: "img_r"
    top: "bbx"
    top: "labels"
    slice_param {
    slice_point: 344000
    slice_point: 688000
    slice_point: 1032000
    slice_point: 1032028
    axis: 3
    }
    }

    之后切片层 ,您可能需要添加额外的 reshape 图层 使数据适应后续层。

    关于c++ - 如何在 ssd-caffe 中加载图像而不是 LMDB,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62307905/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com